## Module

In [1]:
import optuna
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

## Dataset

In [2]:
train = pd.read_csv('../input/titanic/train.csv')
print(train.shape)
train.head()

(891, 12)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
test = pd.read_csv('../input/titanic/test.csv')
print(test.shape)
test.head()

(418, 11)


Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [4]:
submission = pd.read_csv('../input/titanic/gender_submission.csv')
submission.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1


## Info

In [5]:
train['Survived'].value_counts()

0    549
1    342
Name: Survived, dtype: int64

## Preprocess

In [6]:
features = [
    'Pclass',
    'Sex',
    'Age',
    'SibSp',
    'Parch',
    'Fare',
    'Embarked'
]

target = 'Survived'

In [7]:
categorical_features = ['Sex', 'SibSp', 'Parch', 'Embarked']

for col in categorical_features:
    train[col] = train[col].astype('category')
    test[col] = test[col].astype('category')

In [8]:
X_train = train[features]
y_train = train[target]
X_test = test[features]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

(891, 7)
(891,)
(418, 7)


## Params

In [9]:
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.3, stratify=y_train, random_state=123)

print(X_tr.shape)
print(y_val.shape)
print(X_tr.shape)
print(y_val.shape)

(623, 7)
(268,)
(623, 7)
(268,)


In [10]:
def objective(trial):
    params = {
        'objective': 'binary',
        'learning_rate': trial.suggest_uniform('learning_rate', 0.05, 0.1),
        'num_leaves': trial.suggest_int('num_leaves', 32, 128),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 0.95),
        'subsample': trial.suggest_uniform('subsample', 0.6, 0.95)
    }
    
    lgb_train = lgb.Dataset(X_tr, y_tr, categorical_feature=categorical_features)
    lgb_eval = lgb.Dataset(X_val, y_val, reference=lgb_train, categorical_feature=categorical_features)
    
    model = lgb.train(
        params,
        lgb_train,
        valid_sets=[lgb_train, lgb_eval],
        verbose_eval=10,
        num_boost_round=1000,
        early_stopping_rounds=10
    )
    
    y_pred_val = model.predict(X_val, num_iteration=model.best_iteration)
    score = log_loss(y_val, y_pred_val)
    return score

In [11]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=0))
study.optimize(objective, n_trials=40)

[32m[I 2023-10-26 05:31:35,609][0m A new study created in memory with name: no-name-a68a2d98-47a6-44b7-b92e-befef71308a5[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7


[32m[I 2023-10-26 05:31:35,794][0m Trial 0 finished with value: 0.42847945081981315 and parameters: {'learning_rate': 0.07744067519636624, 'num_leaves': 96, 'max_depth': 6, 'colsample_bytree': 0.9002809661679648, 'subsample': 0.8965381085744438}. Best is trial 0 with value: 0.42847945081981315.[0m


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.467099	valid_1's binary_logloss: 0.50022
[20]	training's binary_logloss: 0.39408	valid_1's binary_logloss: 0.454471
[30]	training's binary_logloss: 0.357053	valid_1's binary_logloss: 0.43721
[40]	training's binary_logloss: 0.334249	valid_1's binary_logloss: 0.430004
[50]	training's binary_logloss: 0.309135	valid_1's binary_logloss: 0.431825
Early stopping, best iteration is:
[43]	training's binary_logloss: 0.323521	valid_1's binary_logloss: 0.428479
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[Ligh

[32m[I 2023-10-26 05:31:35,923][0m Trial 1 finished with value: 0.4421269649218983 and parameters: {'learning_rate': 0.08117818483929862, 'num_leaves': 68, 'max_depth': 9, 'colsample_bytree': 0.6198495420611051, 'subsample': 0.6954297031030396}. Best is trial 0 with value: 0.42847945081981315.[0m
[32m[I 2023-10-26 05:31:36,053][0m Trial 2 finished with value: 0.4245061295610646 and parameters: {'learning_rate': 0.0738832558660675, 'num_leaves': 71, 'max_depth': 9, 'colsample_bytree': 0.7988155963828762, 'subsample': 0.9239588234024313}. Best is trial 2 with value: 0.4245061295610646.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.469752	valid_1's binary_logloss: 0.506508
[20]	training's binary_logloss: 0.391385	valid_1's binary_logloss: 0.456927
[30]	training's binary_logloss: 0.350606	valid_1's binary_logloss: 0.438077
[40]	training's binary_logloss: 0.321516	valid_1's binary_logloss: 0.42927
[50]	training's binary_logloss: 0.290954	valid_1's binary_logloss: 0.426705
Early stopping, best iteration is:
[47]	training's binary_logloss: 0.301328	valid_1's binary_logloss: 0.424506




[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.515516	valid_1's binary_logloss: 0.541545
[20]	training's binary_logloss: 0.450117	valid_1's binary_logloss: 0.494036
[30]	training's binary_logloss: 0.404313	valid_1's binary_logloss: 0.461759

[32m[I 2023-10-26 05:31:36,244][0m Trial 3 finished with value: 0.43631485180562585 and parameters: {'learning_rate': 0.05355180290989435, 'num_leaves': 41, 'max_depth': 7, 'colsample_bytree': 0.607076439104114, 'subsample': 0.8914169459417782}. Best is trial 2 with value: 0.4245061295610646.[0m



[40]	training's binary_logloss: 0.378261	valid_1's binary_logloss: 0.452552
[50]	training's binary_logloss: 0.353605	valid_1's binary_logloss: 0.445769
[60]	training's binary_logloss: 0.336389	valid_1's binary_logloss: 0.439855
[70]	training's binary_logloss: 0.323061	valid_1's binary_logloss: 0.438611
[80]	training's binary_logloss: 0.311414	valid_1's binary_logloss: 0.436495
[90]	training's binary_logloss: 0.300569	valid_1's binary_logloss: 0.436749
[100]	training's binary_logloss: 0.289996	valid_1's binary_logloss: 0.437321
Early stopping, best iteration is:
[91]	training's binary_logloss: 0.299548	valid_1's binary_logloss: 0.436315
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start tra

[32m[I 2023-10-26 05:31:36,352][0m Trial 4 finished with value: 0.4249307320559762 and parameters: {'learning_rate': 0.08890783754749253, 'num_leaves': 79, 'max_depth': 3, 'colsample_bytree': 0.9425164197814674, 'subsample': 0.8797054974758531}. Best is trial 2 with value: 0.4245061295610646.[0m
[32m[I 2023-10-26 05:31:36,479][0m Trial 5 finished with value: 0.4376923392327464 and parameters: {'learning_rate': 0.0730739681126466, 'num_leaves': 51, 'max_depth': 6, 'colsample_bytree': 0.6413960490541266, 'subsample': 0.8239723574646333}. Best is trial 2 with value: 0.4245061295610646.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.483823	valid_1's binary_logloss: 0.518292
[20]	training's binary_logloss: 0.419735	valid_1's binary_logloss: 0.473653
[30]	training's binary_logloss: 0.378598	valid_1's binary_logloss: 0.448082
[40]	training's binary_logloss: 0.357672	valid_1's binary_logloss: 0.445179
[50]	training's binary_logloss: 0.337258	valid_1's binary_logloss: 0.43882
[60]	training's binary_logloss: 0.318978	valid_1's binary_logloss: 0.43876
Early stopping, best iteration is:
[51]



[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.508012	valid_1's binary_logloss: 0.530771
[20]	training's binary_logloss: 0.436255	valid_1's binary_logloss: 0.478116
[30]	training's binary_logloss: 0.396328	valid_1's binary_logloss: 0.45434
[40]	training's binary_logloss: 0.371255	valid_1's binary_logloss: 0.445644


[32m[I 2023-10-26 05:31:36,642][0m Trial 6 finished with value: 0.43545190058146804 and parameters: {'learning_rate': 0.05716766437045232, 'num_leaves': 64, 'max_depth': 5, 'colsample_bytree': 0.7451316789966832, 'subsample': 0.6925944642366194}. Best is trial 2 with value: 0.4245061295610646.[0m


[50]	training's binary_logloss: 0.35414	valid_1's binary_logloss: 0.440798
[60]	training's binary_logloss: 0.339299	valid_1's binary_logloss: 0.437352
[70]	training's binary_logloss: 0.327335	valid_1's binary_logloss: 0.437089
[80]	training's binary_logloss: 0.318158	valid_1's binary_logloss: 0.436231
Early stopping, best iteration is:
[76]	training's binary_logloss: 0.321897	valid_1's binary_logloss: 0.435452
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.450712	valid_1's binary_logloss: 0.489702


[32m[I 2023-10-26 05:31:36,761][0m Trial 7 finished with value: 0.4371350059196376 and parameters: {'learning_rate': 0.08871168447171084, 'num_leaves': 87, 'max_depth': 7, 'colsample_bytree': 0.7989518821040269, 'subsample': 0.6065764301527243}. Best is trial 2 with value: 0.4245061295610646.[0m


[20]	training's binary_logloss: 0.375827	valid_1's binary_logloss: 0.448375
[30]	training's binary_logloss: 0.337804	valid_1's binary_logloss: 0.43746
[40]	training's binary_logloss: 0.30798	valid_1's binary_logloss: 0.440336
Early stopping, best iteration is:
[33]	training's binary_logloss: 0.328659	valid_1's binary_logloss: 0.437135


[32m[I 2023-10-26 05:31:36,847][0m Trial 8 finished with value: 0.43315841800490634 and parameters: {'learning_rate': 0.08088177485379386, 'num_leaves': 70, 'max_depth': 3, 'colsample_bytree': 0.8159268989061649, 'subsample': 0.9303118274801184}. Best is trial 2 with value: 0.4245061295610646.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.487391	valid_1's binary_logloss: 0.50398
[20]	training's binary_logloss: 0.429415	valid_1's binary_logloss: 0.451457
[30]	training's binary_logloss: 0.403114	valid_1's binary_logloss: 0.437002
[40]	training's binary_logloss: 0.389383	valid_1's binary_logloss: 0.436694
Early stopping, best iteration is:
[33]	training's binary_logloss: 0.398731	valid_1's binary_logloss: 0.433158
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set 

[32m[I 2023-10-26 05:31:36,947][0m Trial 9 finished with value: 0.43280569884407294 and parameters: {'learning_rate': 0.08409101495517418, 'num_leaves': 90, 'max_depth': 3, 'colsample_bytree': 0.9158220041108944, 'subsample': 0.6347481226256408}. Best is trial 2 with value: 0.4245061295610646.[0m


No further splits with positive gain, best gain: -inf
[20]	training's binary_logloss: 0.426993	valid_1's binary_logloss: 0.450367
[30]	training's binary_logloss: 0.402595	valid_1's binary_logloss: 0.435043
[40]	training's binary_logloss: 0.38705	valid_1's binary_logloss: 0.434686
[50]	training's binary_logloss: 0.371831	valid_1's binary_logloss: 0.435278
Early stopping, best iteration is:
[41]	training's binary_logloss: 0.386109	valid_1's binary_logloss: 0.432806
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7


[32m[I 2023-10-26 05:31:37,068][0m Trial 10 finished with value: 0.4379766273204317 and parameters: {'learning_rate': 0.09849045338733745, 'num_leaves': 43, 'max_depth': 9, 'colsample_bytree': 0.7253532584393337, 'subsample': 0.8627401494264597}. Best is trial 2 with value: 0.4245061295610646.[0m


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.443209	valid_1's binary_logloss: 0.491043
[20]	training's binary_logloss: 0.363908	valid_1's binary_logloss: 0.450962
[30]	training's binary_logloss: 0.325834	valid_1's binary_logloss: 0.440891
[40]	training's binary_logloss: 0.299768	valid_1's binary_logloss: 0.441593
Early stopping, best iteration is:
[35]	training's binary_logloss: 0.312572	valid_1's binary_logloss: 0.437977
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7




[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.483989	valid_1's binary_logloss: 0.509556
[20]	training's binary_logloss: 0.424326	valid_1's binary_logloss: 0.465367
[30]	training's binary_logloss: 0.388791	valid_1's binary_logloss: 0.44312
[40]	training's binary_logloss: 0.369941	valid_1's binary_logloss: 0.436726
[50]	training's binary_logloss: 0.35437	valid_1's binary_logloss: 0.433986
[60]	training's binary_logloss: 0.342604	valid_1's binary_logloss: 0.433773
Early stopping, best iteration is:
[54]	training's binary_logloss: 0.34815	valid_1's binary_logloss: 0.432134


[32m[I 2023-10-26 05:31:37,198][0m Trial 11 finished with value: 0.4321340286515928 and parameters: {'learning_rate': 0.08039153343577339, 'num_leaves': 44, 'max_depth': 4, 'colsample_bytree': 0.6134488992654571, 'subsample': 0.8219959202850673}. Best is trial 2 with value: 0.4245061295610646.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.443942	valid_1's binary_logloss: 0.490507
[20]	training's binary_logloss: 0.367422	valid_1's binary_logloss: 0.451301
[30]	training's binary_logloss: 0.330664	valid_1's binary_logloss: 0.439196
[40]	training's binary_logloss: 0.306195	valid_1's binary_logloss: 0.438965
Early stopping, best iteration is:
[35]	training's binary_logloss: 0.319065	valid_1's binary_logloss: 0.437686


[32m[I 2023-10-26 05:31:37,317][0m Trial 12 finished with value: 0.43768637219375234 and parameters: {'learning_rate': 0.09794746343122603, 'num_leaves': 79, 'max_depth': 7, 'colsample_bytree': 0.6564583312597486, 'subsample': 0.8285879139128894}. Best is trial 2 with value: 0.4245061295610646.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.494045	valid_1's binary_logloss: 0.523695
[20]	training's binary_logloss: 0.41769	valid_1's binary_logloss: 0.472183
[30]	training's binary_logloss: 0.375966	valid_1's binary_logloss: 0.445081
[40]	training's binary_logloss: 0.349085	valid_1's binary_logloss: 0.440121

[32m[I 2023-10-26 05:31:37,459][0m Trial 13 finished with value: 0.4361457569515779 and parameters: {'learning_rate': 0.06266458012698911, 'num_leaves': 47, 'max_depth': 7, 'colsample_bytree': 0.685548957200561, 'subsample': 0.6556393542759319}. Best is trial 2 with value: 0.4245061295610646.[0m


No further splits with positive gain, best gain: -inf
[50]	training's binary_logloss: 0.327885	valid_1's binary_logloss: 0.438422
[60]	training's binary_logloss: 0.308904	valid_1's binary_logloss: 0.436769
Early stopping, best iteration is:
[53]	training's binary_logloss: 0.321132	valid_1's binary_logloss: 0.436146
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.525628	valid_1's binary_logloss: 0.537974
[20]	training's binary_logloss: 0.464929	valid_1's binary_logloss: 0.486399
[30]	training's binary_l

[32m[I 2023-10-26 05:31:37,580][0m Trial 14 finished with value: 0.4323778996639876 and parameters: {'learning_rate': 0.05551875705821526, 'num_leaves': 117, 'max_depth': 3, 'colsample_bytree': 0.6483640329720148, 'subsample': 0.6688038265880187}. Best is trial 2 with value: 0.4245061295610646.[0m


No further splits with positive gain, best gain: -inf
[50]	training's binary_logloss: 0.399503	valid_1's binary_logloss: 0.437924
[60]	training's binary_logloss: 0.389054	valid_1's binary_logloss: 0.435114
[70]	training's binary_logloss: 0.379018	valid_1's binary_logloss: 0.432895
Early stopping, best iteration is:
[67]	training's binary_logloss: 0.382129	valid_1's binary_logloss: 0.432378
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds

[32m[I 2023-10-26 05:31:37,724][0m Trial 15 finished with value: 0.4368126381734662 and parameters: {'learning_rate': 0.06843625853304822, 'num_leaves': 101, 'max_depth': 9, 'colsample_bytree': 0.6339854465275714, 'subsample': 0.8932807176245814}. Best is trial 2 with value: 0.4245061295610646.[0m


No further splits with positive gain, best gain: -inf
[10]	training's binary_logloss: 0.48918	valid_1's binary_logloss: 0.522752
[20]	training's binary_logloss: 0.420632	valid_1's binary_logloss: 0.476453
[30]	training's binary_logloss: 0.377787	valid_1's binary_logloss: 0.448607
[40]	training's binary_logloss: 0.352107	valid_1's binary_logloss: 0.444429
[50]	training's binary_logloss: 0.328366	valid_1's binary_logloss: 0.439744
[60]	training's binary_logloss: 0.308733	valid_1's binary_logloss: 0.441378
Early stopping, best iteration is:
[53]	training's binary_logloss: 0.321648	valid_1's binary_logloss: 0.436813




[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.509388	valid_1's binary_logloss: 0.533749
[20]	training's binary_logloss: 0.435493	valid_1's binary_logloss: 0.481416
[30]	training's binary_logloss: 0.393506	valid_1's binary_logloss: 0.454831

[32m[I 2023-10-26 05:31:37,912][0m Trial 16 finished with value: 0.438137212130941 and parameters: {'learning_rate': 0.054804920394698156, 'num_leaves': 125, 'max_depth': 6, 'colsample_bytree': 0.7640279205766956, 'subsample': 0.9418663808666179}. Best is trial 2 with value: 0.4245061295610646.[0m


No further splits with positive gain, best gain: -inf
[40]	training's binary_logloss: 0.367112	valid_1's binary_logloss: 0.450127
[50]	training's binary_logloss: 0.347589	valid_1's binary_logloss: 0.444476
[60]	training's binary_logloss: 0.33086	valid_1's binary_logloss: 0.4413
[70]	training's binary_logloss: 0.317434	valid_1's binary_logloss: 0.439572
[80]	training's binary_logloss: 0.308209	valid_1's binary_logloss: 0.438763
[90]	training's binary_logloss: 0.295687	valid_1's binary_logloss: 0.44044
Early stopping, best iteration is:
[83]	training's binary_logloss: 0.304139	valid_1's binary_logloss: 0.438137
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-

[32m[I 2023-10-26 05:31:38,030][0m Trial 17 finished with value: 0.4321300743796833 and parameters: {'learning_rate': 0.08024227598725231, 'num_leaves': 105, 'max_depth': 3, 'colsample_bytree': 0.7545988224370959, 'subsample': 0.942855355084455}. Best is trial 2 with value: 0.4245061295610646.[0m
[32m[I 2023-10-26 05:31:38,155][0m Trial 18 finished with value: 0.42229695758103547 and parameters: {'learning_rate': 0.06797222319846608, 'num_leaves': 91, 'max_depth': 5, 'colsample_bytree': 0.8410314139820196, 'subsample': 0.9081665612384084}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.485525	valid_1's binary_logloss: 0.509484
[20]	training's binary_logloss: 0.415374	valid_1's binary_logloss: 0.461351
[30]	training's binary_logloss: 0.378855	valid_1's binary_logloss: 0.439165
[40]	training's binary_logloss: 0.357089	valid_1's binary_logloss: 0.431688
[50]	training's binary_logloss: 0.335378	valid_1's binary_logloss: 0.422297
[60]	training's binary_logloss: 0.319352	valid_1's binary_logloss: 0.424844
Early stopping, best iteration is:
[5



[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.448831	valid_1's binary_logloss: 0.486462
[20]	training's binary_logloss: 0.382136	valid_1's binary_logloss: 0.446764
[30]	training's binary_logloss: 0.349725	valid_1's binary_logloss: 0.436877
[40]	training's binary_logloss: 0.325297	valid_1's binary_logloss: 0.436176
Early stopping, best iteration is:
[35]	training's binary_logloss: 0.33593	valid_1's binary_logloss: 0.43326


[32m[I 2023-10-26 05:31:38,269][0m Trial 19 finished with value: 0.4332600804126494 and parameters: {'learning_rate': 0.09591177331810724, 'num_leaves': 99, 'max_depth': 5, 'colsample_bytree': 0.7978161033117064, 'subsample': 0.9027858964568847}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.482287	valid_1's binary_logloss: 0.499835
[20]	training's binary_logloss: 0.416429	valid_1's binary_logloss: 0.448268
[30]	training's binary_logloss: 0.38527	valid_1's binary_logloss: 0.432467
[40]	training's binary_logloss: 0.36465	valid_1's binary_logloss: 0.428241
[50]	training's binary_logloss: 0.347536	valid_1's binary_logloss: 0.424235
[60]	training's binary_logloss: 0.335452	valid_1's binary_logloss: 0.423443

[32m[I 2023-10-26 05:31:38,405][0m Trial 20 finished with value: 0.42290722593726743 and parameters: {'learning_rate': 0.07544844803335073, 'num_leaves': 46, 'max_depth': 4, 'colsample_bytree': 0.9224051635830199, 'subsample': 0.6290893724207108}. Best is trial 18 with value: 0.42229695758103547.[0m


No further splits with positive gain, best gain: -inf
Early stopping, best iteration is:
[55]	training's binary_logloss: 0.342255	valid_1's binary_logloss: 0.422907
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.496646	valid_1's binary_logloss: 0.521816
[20]	training's binary_logloss: 0.424976	valid_1's binary_logloss: 0.471407
[30]	training's binary_logloss: 0.387005	valid_1's binary_logloss: 0.445281
[40]	training's binary_logloss: 0.363932	valid_1's binary_logloss: 0.437283

[32m[I 2023-10-26 05:31:38,538][0m Trial 21 finished with value: 0.4315119368111279 and parameters: {'learning_rate': 0.06388592806405163, 'num_leaves': 43, 'max_depth': 5, 'colsample_bytree': 0.6461292518415372, 'subsample': 0.850714521441498}. Best is trial 18 with value: 0.42229695758103547.[0m


No further splits with positive gain, best gain: -inf
[50]	training's binary_logloss: 0.346096	valid_1's binary_logloss: 0.433527
[60]	training's binary_logloss: 0.330292	valid_1's binary_logloss: 0.432741
Early stopping, best iteration is:
[53]	training's binary_logloss: 0.340831	valid_1's binary_logloss: 0.431512
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds

[32m[I 2023-10-26 05:31:38,670][0m Trial 22 finished with value: 0.4382329583140027 and parameters: {'learning_rate': 0.06447030464736006, 'num_leaves': 51, 'max_depth': 9, 'colsample_bytree': 0.8052795271835291, 'subsample': 0.6070376411656228}. Best is trial 18 with value: 0.42229695758103547.[0m



[10]	training's binary_logloss: 0.484843	valid_1's binary_logloss: 0.5163
[20]	training's binary_logloss: 0.405831	valid_1's binary_logloss: 0.464199
[30]	training's binary_logloss: 0.362519	valid_1's binary_logloss: 0.444226
[40]	training's binary_logloss: 0.335605	valid_1's binary_logloss: 0.438309
[50]	training's binary_logloss: 0.309135	valid_1's binary_logloss: 0.439179
Early stopping, best iteration is:
[41]	training's binary_logloss: 0.333105	valid_1's binary_logloss: 0.438233
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7


[32m[I 2023-10-26 05:31:38,763][0m Trial 23 finished with value: 0.4319453390007526 and parameters: {'learning_rate': 0.09144700146086816, 'num_leaves': 62, 'max_depth': 3, 'colsample_bytree': 0.8372357878786805, 'subsample': 0.6945027906172576}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.476688	valid_1's binary_logloss: 0.493763
[20]	training's binary_logloss: 0.421109	valid_1's binary_logloss: 0.447736
[30]	training's binary_logloss: 0.398688	valid_1's binary_logloss: 0.433285
[40]	training's binary_logloss: 0.384663	valid_1's binary_logloss: 0.435355
Early stopping, best iteration is:
[34]	training's binary_logloss: 0.394263	valid_1's binary_logloss: 0.431945


[32m[I 2023-10-26 05:31:38,871][0m Trial 24 finished with value: 0.43280527138296226 and parameters: {'learning_rate': 0.08675970110612974, 'num_leaves': 45, 'max_depth': 3, 'colsample_bytree': 0.6870636002319853, 'subsample': 0.8016550670462429}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.484363	valid_1's binary_logloss: 0.503019
[20]	training's binary_logloss: 0.428252	valid_1's binary_logloss: 0.455871
[30]	training's binary_logloss: 0.400903	valid_1's binary_logloss: 0.43804
[40]	training's binary_logloss: 0.387638	valid_1's binary_logloss: 0.434195
[50]	training's binary_logloss: 0.377252	valid_1's binary_logloss: 0.434891
Early stopping, best iteration is:
[47]	training's binary_logloss: 0.380795	valid_1's binary_logloss: 0.432805
[Li

[32m[I 2023-10-26 05:31:38,975][0m Trial 25 finished with value: 0.43264815010648444 and parameters: {'learning_rate': 0.07960209656359196, 'num_leaves': 98, 'max_depth': 3, 'colsample_bytree': 0.6780785714242163, 'subsample': 0.9334621540309447}. Best is trial 18 with value: 0.42229695758103547.[0m


[binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.492726	valid_1's binary_logloss: 0.509797
[20]	training's binary_logloss: 0.435637	valid_1's binary_logloss: 0.460714
[30]	training's binary_logloss: 0.406817	valid_1's binary_logloss: 0.441265
[40]	training's binary_logloss: 0.390071	valid_1's binary_logloss: 0.435164
[50]	training's binary_logloss: 0.379723	valid_1's binary_logloss: 0.432848
Early stopping, best iteration is:
[48]	training's binary_logloss: 0.382185	valid_1's binary_logloss: 0.432648


[32m[I 2023-10-26 05:31:39,104][0m Trial 26 finished with value: 0.43188661123953703 and parameters: {'learning_rate': 0.07235626893088137, 'num_leaves': 36, 'max_depth': 6, 'colsample_bytree': 0.8448177463611265, 'subsample': 0.7041029327992967}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.474549	valid_1's binary_logloss: 0.505582
[20]	training's binary_logloss: 0.401924	valid_1's binary_logloss: 0.459378
[30]	training's binary_logloss: 0.364033	valid_1's binary_logloss: 0.44044
[40]	training's binary_logloss: 0.340511	valid_1's binary_logloss: 0.431887
[50]	training's binary_logloss: 0.317691	valid_1's binary_logloss: 0.434013
Early stopping, best iteration is:
[40]	training's binary_logloss: 0.340511	valid_1's binary_logloss: 0.431887




[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.480316	valid_1's binary_logloss: 0.499101
[20]	training's binary_logloss: 0.423103	valid_1's binary_logloss: 0.452992
[30]	training's binary_logloss: 0.397468	valid_1's binary_logloss: 0.437187
[40]	training's binary_logloss: 0.382698	valid_1's binary_logloss: 0.436572
Early stopping, best iteration is:
[37]	training's binary_logloss: 0.387039	valid_1's binary_logloss: 0.435217


[32m[I 2023-10-26 05:31:39,205][0m Trial 27 finished with value: 0.4352169190056494 and parameters: {'learning_rate': 0.09068989098512387, 'num_leaves': 107, 'max_depth': 3, 'colsample_bytree': 0.6874286964792423, 'subsample': 0.7096264161810828}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.439373	valid_1's binary_logloss: 0.481591
[20]	training's binary_logloss: 0.365097	valid_1's binary_logloss: 0.442936


[32m[I 2023-10-26 05:31:39,916][0m Trial 28 finished with value: 0.4360847200744944 and parameters: {'learning_rate': 0.09827081102599636, 'num_leaves': 112, 'max_depth': 7, 'colsample_bytree': 0.8308839442242945, 'subsample': 0.7866221888708269}. Best is trial 18 with value: 0.42229695758103547.[0m


[30]	training's binary_logloss: 0.32867	valid_1's binary_logloss: 0.436085
[40]	training's binary_logloss: 0.300282	valid_1's binary_logloss: 0.437151
Early stopping, best iteration is:
[30]	training's binary_logloss: 0.32867	valid_1's binary_logloss: 0.436085




[LightGBM] [Info] Number of positive: 239, number of negative: 384


[32m[I 2023-10-26 05:31:40,655][0m Trial 29 finished with value: 0.4354132293377502 and parameters: {'learning_rate': 0.0615266511753165, 'num_leaves': 64, 'max_depth': 7, 'colsample_bytree': 0.8122376249447735, 'subsample': 0.6067176194082667}. Best is trial 18 with value: 0.42229695758103547.[0m


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.490861	valid_1's binary_logloss: 0.518046
[20]	training's binary_logloss: 0.414075	valid_1's binary_logloss: 0.46587
[30]	training's binary_logloss: 0.373059	valid_1's binary_logloss: 0.44467
[40]	training's binary_logloss: 0.34652	valid_1's binary_logloss: 0.437817
[50]	training's binary_logloss: 0.323642	valid_1's binary_logloss: 0.437001
[60]	training's binary_logloss: 0.305344	valid_1's binary_logloss: 0.436299
Early stopping, best iteration is:
[56]	training's binary_logloss: 0.311264	valid_1's binary_logloss: 0.435

[32m[I 2023-10-26 05:31:40,772][0m Trial 30 finished with value: 0.43237962465824226 and parameters: {'learning_rate': 0.06507874083372747, 'num_leaves': 45, 'max_depth': 3, 'colsample_bytree': 0.7342117787622318, 'subsample': 0.8622094430034536}. Best is trial 18 with value: 0.42229695758103547.[0m
[32m[I 2023-10-26 05:31:40,888][0m Trial 31 finished with value: 0.44091072081294846 and parameters: {'learning_rate': 0.0939226095138021, 'num_leaves': 111, 'max_depth': 6, 'colsample_bytree': 0.6323308613113143, 'subsample': 0.7239163317293336}. Best is trial 18 with value: 0.42229695758103547.[0m


No further splits with positive gain, best gain: -inf
[40]	training's binary_logloss: 0.403482	valid_1's binary_logloss: 0.441182
[50]	training's binary_logloss: 0.391237	valid_1's binary_logloss: 0.434578
[60]	training's binary_logloss: 0.379772	valid_1's binary_logloss: 0.433108
Early stopping, best iteration is:
[58]	training's binary_logloss: 0.382328	valid_1's binary_logloss: 0.43238
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.45707	valid_1's binary_logloss: 0.498243
[20]	training's binary_log

[32m[I 2023-10-26 05:31:40,991][0m Trial 32 finished with value: 0.4324379288313208 and parameters: {'learning_rate': 0.07759081295666295, 'num_leaves': 76, 'max_depth': 3, 'colsample_bytree': 0.828236144500591, 'subsample': 0.7509964524018908}. Best is trial 18 with value: 0.42229695758103547.[0m
[32m[I 2023-10-26 05:31:41,110][0m Trial 33 finished with value: 0.43192228927859533 and parameters: {'learning_rate': 0.09482732979255315, 'num_leaves': 108, 'max_depth': 9, 'colsample_bytree': 0.9444147844774512, 'subsample': 0.6911193246555916}. Best is trial 18 with value: 0.42229695758103547.[0m


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.491264	valid_1's binary_logloss: 0.507168
[20]	training's binary_logloss: 0.432674	valid_1's binary_logloss: 0.45438
[30]	training's binary_logloss: 0.406103	valid_1's binary_logloss: 0.439205
[40]	training's binary_logloss: 0.392193	valid_1's binary_logloss: 0.432688
[50]	training's binary_logloss: 0.378493	valid_1's binary_logloss: 0.435125
Early stopping, best iteration is:
[41]	training's binary_logloss: 0.391205	valid_1's binary_logloss: 0.432438
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[Li

[32m[I 2023-10-26 05:31:41,224][0m Trial 34 finished with value: 0.43670445846878597 and parameters: {'learning_rate': 0.076851126068789, 'num_leaves': 75, 'max_depth': 3, 'colsample_bytree': 0.6348491811887839, 'subsample': 0.7233090814201866}. Best is trial 18 with value: 0.42229695758103547.[0m



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.498585	valid_1's binary_logloss: 0.516257
[20]	training's binary_logloss: 0.448396	valid_1's binary_logloss: 0.470302
[30]	training's binary_logloss: 0.416792	valid_1's binary_logloss: 0.445554
[40]	training's binary_logloss: 0.398575	valid_1's binary_logloss: 0.442421
[50]	training's binary_logloss: 0.385587	valid_1's binary_logloss: 0.44018
[60]	training's binary_logloss: 0.375039	valid_1's binary_logloss: 0.438373
Early stopping, best iteration is:
[54]	training's binary_logloss: 0.381155	valid_1's binary_logloss: 0.

[32m[I 2023-10-26 05:31:41,368][0m Trial 35 finished with value: 0.43897887215177783 and parameters: {'learning_rate': 0.07346245869951211, 'num_leaves': 69, 'max_depth': 8, 'colsample_bytree': 0.9166267104733297, 'subsample': 0.6131457843398491}. Best is trial 18 with value: 0.42229695758103547.[0m
[32m[I 2023-10-26 05:31:41,495][0m Trial 36 finished with value: 0.43602057365343355 and parameters: {'learning_rate': 0.07541577242894455, 'num_leaves': 121, 'max_depth': 8, 'colsample_bytree': 0.896802880262782, 'subsample': 0.8825616355537538}. Best is trial 18 with value: 0.42229695758103547.[0m


No further splits with positive gain, best gain: -inf
[50]	training's binary_logloss: 0.295309	valid_1's binary_logloss: 0.441224
Early stopping, best iteration is:
[47]	training's binary_logloss: 0.301704	valid_1's binary_logloss: 0.438979
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.467774	valid_1's binary_logloss: 0.503069
[20]	training's binary_logloss: 0.390126	valid_1's binary_logloss: 0.456883
[30]	training's binary_logloss: 0.350433	valid_1's binary_logloss: 0.438747
[40]	training's binary_l

[32m[I 2023-10-26 05:31:41,616][0m Trial 37 finished with value: 0.42043194243489046 and parameters: {'learning_rate': 0.07845503693072967, 'num_leaves': 67, 'max_depth': 4, 'colsample_bytree': 0.9439012359388177, 'subsample': 0.7306517627741391}. Best is trial 37 with value: 0.42043194243489046.[0m


[binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.469027	valid_1's binary_logloss: 0.484627
[20]	training's binary_logloss: 0.408849	valid_1's binary_logloss: 0.441896
[30]	training's binary_logloss: 0.380382	valid_1's binary_logloss: 0.433417
[40]	training's binary_logloss: 0.359447	valid_1's binary_logloss: 0.422395
[50]	training's binary_logloss: 0.340297	valid_1's binary_logloss: 0.421747
Early stopping, best iteration is:
[47]	training's binary_logloss: 0.34635	valid_1's binary_logloss: 0.420432
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [bi

[32m[I 2023-10-26 05:31:41,751][0m Trial 38 finished with value: 0.4380170550796625 and parameters: {'learning_rate': 0.07100376848953054, 'num_leaves': 82, 'max_depth': 7, 'colsample_bytree': 0.7279236371921936, 'subsample': 0.6058197908068536}. Best is trial 37 with value: 0.42043194243489046.[0m
[32m[I 2023-10-26 05:31:41,904][0m Trial 39 finished with value: 0.4329864959835051 and parameters: {'learning_rate': 0.06153711677033979, 'num_leaves': 103, 'max_depth': 8, 'colsample_bytree': 0.8554966968484203, 'subsample': 0.6600703870415041}. Best is trial 37 with value: 0.42043194243489046.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 191
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.49002	valid_1's binary_logloss: 0.518598
[20]	training's binary_logloss: 0.41204	valid_1's binary_logloss: 0.468053
[30]	training's binary_logloss: 0.369028	valid_1's binary_logloss: 0.448051
[40]	training's binary_logloss: 0.340991	valid_1's binary_logloss: 0.438875
[50]	training's binary_logloss: 0.320443	valid_1's binary_logloss: 0.436253
[60]	training's binary_logloss: 0.301397	valid_1's binary_logloss: 0.434511
Early stopping, best iteration is:
[58]

In [12]:
params = {
    'objective': 'binary'
}

for i, j in study.best_params.items():
    params[i] = j

params

{'objective': 'binary',
 'learning_rate': 0.07845503693072967,
 'num_leaves': 67,
 'max_depth': 4,
 'colsample_bytree': 0.9439012359388177,
 'subsample': 0.7306517627741391}

## Model

In [13]:
scores = []
y_preds_test = []

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=456)

for tr_idx, val_idx in kf.split(X_train, y_train):
    X_tr, X_val = X_train.iloc[tr_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[tr_idx], y_train.iloc[val_idx]
    
    lgb_train = lgb.Dataset(X_tr, y_tr, categorical_feature=categorical_features)
    lgb_eval = lgb.Dataset(X_val, y_val, reference=lgb_train, categorical_feature=categorical_features)
    
    model = lgb.train(
        params,
        lgb_train,
        valid_sets=[lgb_train, lgb_eval],
        verbose_eval=10,
        num_boost_round=1000,
        early_stopping_rounds=10
    )
    
    y_pred_val = model.predict(X_val, num_iteration=model.best_iteration)
    y_pred_val = (y_pred_val > 0.5).astype(int)
    score = f1_score(y_val, y_pred_val, average='macro')
    scores.append(score)
    
    y_pred_test = model.predict(X_test, num_iteration=model.best_iteration)
    y_preds_test.append(y_pred_test)

y_preds_test = np.mean(y_preds_test, axis=0)
y_preds_test = (y_preds_test > 0.5).astype(int)



[LightGBM] [Info] Number of positive: 273, number of negative: 439
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 207
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.474589	valid_1's binary_logloss: 0.471442
[20]	training's binary_logloss: 0.415452	valid_1's binary_logloss: 0.415623
[30]	training's binary_logloss: 0.387418	valid_1's binary_logloss: 0.3924
[40]	training's binary_logloss: 0.367716	valid_1's binary_logloss: 0.37639
[50]	training's binary_logloss: 0.348471	valid_1's binary_logloss: 0.364311
[60]	training's binary_logloss: 0.33319	valid_1's binary_logloss: 0.359127
[70]	training's binary_logloss: 0.322953	

In [14]:
print('===CV scores===')
print(scores)
print(np.mean(scores))

===CV scores===
[0.8688033049040511, 0.7682291666666667, 0.830360789652825, 0.7926260964912281, 0.7161084529505584]
0.7952255621330659


## Submit

In [15]:
submit = pd.DataFrame(y_preds_test, index=test['PassengerId'], columns=['Survived'])
submit.to_csv('submit.csv')