In [2]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.metrics import accuracy_score,classification_report,roc_curve,roc_auc_score,confusion_matrix,precision_recall_curve,precision_score,recall_score,f1_score
from sklearn.model_selection import train_test_split,cross_val_score,StratifiedKFold,cross_val_predict
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,OneHotEncoder,FunctionTransformer
from imblearn.under_sampling import RandomUnderSampler,NearMiss,TomekLinks,InstanceHardnessThreshold
from sklearn.compose import ColumnTransformer
import optuna
from sklearn.metrics import average_precision_score
from sklearn.utils import shuffle
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.combine import SMOTETomek
from imblearn.over_sampling import SMOTE,ADASYN
from sklearn.metrics import average_precision_score

In [3]:
df = pd.read_csv('fraud.csv')

In [4]:
df.shape

(6362620, 11)

In [5]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [6]:
df = df[~(df['nameDest'].str.startswith('M'))|(df['nameOrig'].str.startswith('M'))]

In [7]:
df.shape

(4211125, 11)

In [8]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
9,1,DEBIT,5337.77,C712410124,41720.0,36382.23,C195600860,41898.0,40348.79,0,0
10,1,DEBIT,9644.94,C1900366749,4465.0,0.0,C997608398,10845.0,157982.12,0,0
15,1,CASH_OUT,229133.94,C905080434,15325.0,0.0,C476402209,5083.0,51513.44,0,0


In [9]:
df.drop(columns=['nameOrig','nameDest'],inplace=True)

In [10]:
df.head(2)

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
2,1,TRANSFER,181.0,181.0,0.0,0.0,0.0,1,0
3,1,CASH_OUT,181.0,181.0,0.0,21182.0,0.0,1,0


In [11]:
df['diffOrg'] = df['oldbalanceOrg'] - df['newbalanceOrig']
df['diffdest'] = df['oldbalanceDest'] - df['newbalanceDest']
df['isFraud'] = df['isFraud'].astype('int')
df.drop(columns=['oldbalanceOrg','newbalanceOrig','newbalanceDest','oldbalanceDest'],inplace=True)
hours = df['step'] % 24
bins = [0, 6, 12, 18, 24]
labels = ['Late Night', 'Morning', 'Afternoon', 'Night']

buckets =  pd.cut(
    hours,
    bins=bins,
    labels=labels,
    right=False  # 0-5, 6-11, 12-17, 18-23
)
df['transaction_time'] = buckets

In [12]:
df.drop(columns=['step'],inplace=True)

In [13]:
df.drop(columns=['isFlaggedFraud'],inplace=True)

In [14]:
df.head()

Unnamed: 0,type,amount,isFraud,diffOrg,diffdest,transaction_time
2,TRANSFER,181.0,1,181.0,0.0,Late Night
3,CASH_OUT,181.0,1,181.0,21182.0,Late Night
9,DEBIT,5337.77,0,5337.77,1549.21,Late Night
10,DEBIT,9644.94,0,4465.0,-147137.12,Late Night
15,CASH_OUT,229133.94,0,15325.0,-46430.44,Late Night


In [15]:
# Create train , test , validation Dataset
train,val = train_test_split(df,stratify=df['isFraud'],test_size=0.1,random_state=42)

In [16]:
train['isFraud'].value_counts()/train.shape[0]

isFraud
0    0.99805
1    0.00195
Name: count, dtype: float64

In [17]:
val['isFraud'].value_counts()/val.shape[0]

isFraud
0    0.99805
1    0.00195
Name: count, dtype: float64

In [17]:
# Save Validation data
#val.to_csv('validation_set.csv',index=False)

In [18]:
# using 25% random non_fraud data as for sample
fraud_df = train[train['isFraud']==1]
non_fraud_df = train[train['isFraud']==False].sample(int(len(train) * 0.50),random_state=42)
sample_df = pd.concat([fraud_df, non_fraud_df])
sampel_df = sample_df.sample(len(sample_df))
sample_df.shape

(1902398, 6)

In [19]:
x,y = sample_df.drop(columns=['isFraud']),sample_df['isFraud']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [20]:
# log transformer
log_transformer = FunctionTransformer(
    lambda x: np.log1p(np.abs(x)),
    feature_names_out="one-to-one"
)

In [21]:
# preprocessor
preprocessor = ColumnTransformer(transformers=[
                                              ('num',log_transformer,['amount','diffOrg','diffdest']),
                                              ('cat',OneHotEncoder(),['type'])
                                              ])


In [22]:
# create model
neg = (y_train == 0).sum()
pos = (y_train == 1).sum()

# scale_pos_weight = neg / pos
sampler = SMOTE(sampling_strategy=0.2, random_state=42)
lgbm = LGBMClassifier(random_state=42)
pipeline = Pipeline([
      ('preprocessing',preprocessor),
      ('sampling',sampler),
      ('model',lgbm)
])

In [23]:
pipeline.fit(x_train,y_train)
y_pred = pipeline.predict(x_test)
accuracy_score(y_test,y_pred)

[LightGBM] [Info] Number of positive: 303195, number of negative: 1515976
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009457 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 823
[LightGBM] [Info] Number of data points in the train set: 1819171, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




0.9885670731707317

In [25]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99    379030
           1       0.23      0.84      0.36      1450

    accuracy                           0.99    380480
   macro avg       0.61      0.91      0.68    380480
weighted avg       1.00      0.99      0.99    380480



In [26]:
print(confusion_matrix(y_test,y_pred))

[[374911   4119]
 [   231   1219]]


In [28]:
y_prob = pipeline.predict_proba(x_test)[:,1]

precision,recall,pr_thresold = precision_recall_curve(y_test,y_prob)



In [29]:
f1_scores = 2 * (precision * recall) / (precision + recall + 1e-12)

In [30]:
best_idx = np.nanargmax(f1_scores[:-1]) # exclude last element as it corresponds to threshold=none
best_threshold = pr_thresold[best_idx]
best_precision = precision[best_idx]
best_recall = recall[best_idx]
best_f1 = f1_scores[best_idx]
print(f"Best threshold by F1: {best_threshold:.4f}")
print(f"Precision={best_precision:.4f}, Recall={best_recall:.4f}, F1={best_f1:.4f}")

Best threshold by F1: 0.9812
Precision=0.9684, Recall=0.7186, F1=0.8250


# Tunning

In [38]:
def objective(trial):
  # Define Parameters
  params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'num_leaves': trial.suggest_int('num_leaves', 20, 120),
        'max_depth': trial.suggest_int('max_depth', -1, 15),
        'min_child_samples':  trial.suggest_int('min_child_samples', 10, 100),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        'n_estimators': trial.suggest_int('n_estimators',100,1000,step=100),
        'n_jobs': -1,
        'random_state': 42
        
    }

    

  log_transformer = FunctionTransformer(
    lambda x: np.log1p(np.abs(x)),
    feature_names_out="one-to-one"
    )

  # Column transformer
  preprocessor = ColumnTransformer(transformers=[
                                              ('num',log_transformer,['amount','diffOrg','diffdest']),
                                              ('cat',OneHotEncoder(),['type','transaction_time'])
                                              ])


  # create model
  lgbm = LGBMClassifier(**params)
  pipeline = Pipeline([
      ('preprocessing',preprocessor),
      ('sampling',SMOTE(sampling_strategy=0.2, random_state=42)),
      ('model',lgbm)
  ])

  cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

  y_pred_proba = cross_val_predict(pipeline,x_train,y_train,cv=cv,method='predict_proba')[:,1]
   
  # Find best threshold INSIDE CV
  precision, recall, thresholds = precision_recall_curve(y_train, y_pred_proba)
  f1_scores = 2 * precision * recall / (precision + recall + 1e-12)

  best_f1 = np.max(f1_scores[:-1])  # ignore last threshold
  return best_f1


In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=30,show_progress_bar=False)

[I 2026-01-04 12:09:07,865] A new study created in memory with name: no-name-1abbb0f0-c1a5-4cc5-8cd4-243037045e7b


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040651 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041743 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045390 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:11:19,392] Trial 0 finished with value: 0.8433442316669512 and parameters: {'learning_rate': 0.11597459560096018, 'num_leaves': 23, 'max_depth': 8, 'min_child_samples': 86, 'subsample': 0.14799357467826196, 'colsample_bytree': 0.353204945231265, 'reg_alpha': 0.33324295554456707, 'reg_lambda': 3.6292614293392194, 'n_estimators': 900}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051548 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066899 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:13:00,762] Trial 1 finished with value: 0.8416814904966509 and parameters: {'learning_rate': 0.13728857621725746, 'num_leaves': 109, 'max_depth': 8, 'min_child_samples': 77, 'subsample': 0.8971582337871604, 'colsample_bytree': 0.7825925797147189, 'reg_alpha': 4.411243404190986, 'reg_lambda': 3.099139758361531, 'n_estimators': 500}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054521 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054778 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:13:26,417] Trial 2 finished with value: 0.8429076036433053 and parameters: {'learning_rate': 0.07792930579419573, 'num_leaves': 58, 'max_depth': 8, 'min_child_samples': 41, 'subsample': 0.9010510989553135, 'colsample_bytree': 0.7419042711799152, 'reg_alpha': 2.029854819196646, 'reg_lambda': 0.10153674815424563, 'n_estimators': 100}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049665 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.063305 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053560 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:14:27,835] Trial 3 finished with value: 0.8416494068908645 and parameters: {'learning_rate': 0.10681427610162142, 'num_leaves': 28, 'max_depth': 2, 'min_child_samples': 24, 'subsample': 0.13421040511986496, 'colsample_bytree': 0.9418112810795696, 'reg_alpha': 2.9712086505550546, 'reg_lambda': 3.716770463108778, 'n_estimators': 700}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041466 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054114 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:16:23,962] Trial 4 finished with value: 0.8429953960344594 and parameters: {'learning_rate': 0.1770869594155951, 'num_leaves': 110, 'max_depth': 0, 'min_child_samples': 24, 'subsample': 0.6823046809647019, 'colsample_bytree': 0.6922321491595318, 'reg_alpha': 3.17998474071684, 'reg_lambda': 0.6236319391967093, 'n_estimators': 1000}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044813 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049331 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040855 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:18:10,044] Trial 5 finished with value: 0.8273548867972322 and parameters: {'learning_rate': 0.020375244888809728, 'num_leaves': 68, 'max_depth': 7, 'min_child_samples': 50, 'subsample': 0.4514066475374483, 'colsample_bytree': 0.3552658045400271, 'reg_alpha': 4.806176385805852, 'reg_lambda': 1.016478605683517, 'n_estimators': 600}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051679 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049052 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041334 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:24:38,547] Trial 6 finished with value: 0.8387930238991079 and parameters: {'learning_rate': 0.03491636025623496, 'num_leaves': 48, 'max_depth': 9, 'min_child_samples': 99, 'subsample': 0.1548151621438907, 'colsample_bytree': 0.3580398286392983, 'reg_alpha': 1.8048405011323516, 'reg_lambda': 1.873313847308804, 'n_estimators': 600}. Best is trial 0 with value: 0.8433442316669512.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049147 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048125 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055347 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:25:19,724] Trial 7 finished with value: 0.843454664545216 and parameters: {'learning_rate': 0.10068156352875068, 'num_leaves': 49, 'max_depth': 0, 'min_child_samples': 47, 'subsample': 0.8370151943208435, 'colsample_bytree': 0.5039451809241656, 'reg_alpha': 4.83059831237253, 'reg_lambda': 0.6661061336328783, 'n_estimators': 200}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050495 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065897 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:26:21,985] Trial 8 finished with value: 0.8404464871958296 and parameters: {'learning_rate': 0.06087675195147654, 'num_leaves': 23, 'max_depth': 2, 'min_child_samples': 45, 'subsample': 0.9395597135086334, 'colsample_bytree': 0.7482197461641972, 'reg_alpha': 3.8980259526329206, 'reg_lambda': 1.321250394465624, 'n_estimators': 800}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.039423 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:27:34,123] Trial 9 finished with value: 0.8188733925008612 and parameters: {'learning_rate': 0.13429568749138976, 'num_leaves': 89, 'max_depth': 6, 'min_child_samples': 89, 'subsample': 0.10088967070651267, 'colsample_bytree': 0.1907727248908414, 'reg_alpha': 1.5033098376321274, 'reg_lambda': 4.955014382064883, 'n_estimators': 500}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053498 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067937 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:28:03,638] Trial 10 finished with value: 0.8427555222758072 and parameters: {'learning_rate': 0.19401858283072937, 'num_leaves': 45, 'max_depth': 15, 'min_child_samples': 66, 'subsample': 0.6600980593444014, 'colsample_bytree': 0.527864574939081, 'reg_alpha': 3.6062827431849476, 'reg_lambda': 2.157063909596108, 'n_estimators': 100}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051079 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047260 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.064321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:29:05,926] Trial 11 finished with value: 0.8427060915125595 and parameters: {'learning_rate': 0.10654078052705086, 'num_leaves': 37, 'max_depth': 12, 'min_child_samples': 70, 'subsample': 0.4199406013567056, 'colsample_bytree': 0.4467200487713657, 'reg_alpha': 0.10609946282280358, 'reg_lambda': 3.9262616938581134, 'n_estimators': 300}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038474 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047368 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040565 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:31:17,470] Trial 12 finished with value: 0.7937827579969724 and parameters: {'learning_rate': 0.14148249025789641, 'num_leaves': 20, 'max_depth': 4, 'min_child_samples': 36, 'subsample': 0.28270979901296084, 'colsample_bytree': 0.13285169313398476, 'reg_alpha': 0.45722562697970304, 'reg_lambda': 2.8615441498932843, 'n_estimators': 1000}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050588 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052535 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054395 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:32:30,889] Trial 13 finished with value: 0.8401277475103089 and parameters: {'learning_rate': 0.07105900730851747, 'num_leaves': 78, 'max_depth': 11, 'min_child_samples': 10, 'subsample': 0.7412457154966591, 'colsample_bytree': 0.2730272982781388, 'reg_alpha': 0.7659192653607501, 'reg_lambda': 4.6161893182195834, 'n_estimators': 300}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052847 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066418 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051098 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:33:32,867] Trial 14 finished with value: 0.8424497428700064 and parameters: {'learning_rate': 0.09486649060557152, 'num_leaves': 38, 'max_depth': -1, 'min_child_samples': 60, 'subsample': 0.504616191524447, 'colsample_bytree': 0.5717303875912476, 'reg_alpha': 1.1864156949899718, 'reg_lambda': 3.6268269045464216, 'n_estimators': 300}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053560 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055257 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055075 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:36:10,915] Trial 15 finished with value: 0.8429334082602536 and parameters: {'learning_rate': 0.16718632717448975, 'num_leaves': 55, 'max_depth': 5, 'min_child_samples': 82, 'subsample': 0.2997358177698427, 'colsample_bytree': 0.5573888533810563, 'reg_alpha': 2.487861022117328, 'reg_lambda': 1.5989920341768564, 'n_estimators': 900}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055077 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049149 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066548 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:37:07,158] Trial 16 finished with value: 0.8421644940443344 and parameters: {'learning_rate': 0.12113438824044381, 'num_leaves': 34, 'max_depth': 3, 'min_child_samples': 56, 'subsample': 0.7887673207355282, 'colsample_bytree': 0.4597684383977097, 'reg_alpha': 4.178790103169402, 'reg_lambda': 2.7529472234699894, 'n_estimators': 400}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051905 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052747 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.035559 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:42:22,036] Trial 17 finished with value: 0.8406710538506499 and parameters: {'learning_rate': 0.04236596163152644, 'num_leaves': 63, 'max_depth': 10, 'min_child_samples': 95, 'subsample': 0.5993485941792462, 'colsample_bytree': 0.2716155202414695, 'reg_alpha': 2.380770653272017, 'reg_lambda': 0.2110119188887829, 'n_estimators': 800}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043799 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056942 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057474 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:43:16,267] Trial 18 finished with value: 0.843291995489924 and parameters: {'learning_rate': 0.08758119256552171, 'num_leaves': 81, 'max_depth': 15, 'min_child_samples': 78, 'subsample': 0.26962572229844317, 'colsample_bytree': 0.6493178114284627, 'reg_alpha': 4.840773233709149, 'reg_lambda': 4.245308433004156, 'n_estimators': 200}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044853 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040887 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051878 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:45:20,837] Trial 19 finished with value: 0.8427894883456842 and parameters: {'learning_rate': 0.1616312341744008, 'num_leaves': 47, 'max_depth': 12, 'min_child_samples': 30, 'subsample': 0.8144720935107891, 'colsample_bytree': 0.3836273875799342, 'reg_alpha': 3.0694179720649446, 'reg_lambda': 2.3242792183728356, 'n_estimators': 700}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041945 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038836 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049520 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:45:49,322] Trial 20 finished with value: 0.7667825132608902 and parameters: {'learning_rate': 0.05408858370509449, 'num_leaves': 31, 'max_depth': 1, 'min_child_samples': 67, 'subsample': 0.9968443345097455, 'colsample_bytree': 0.8519332732556829, 'reg_alpha': 0.9779338915733548, 'reg_lambda': 3.353188846375617, 'n_estimators': 400}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043903 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049439 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051333 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:46:35,450] Trial 21 finished with value: 0.8427649424744852 and parameters: {'learning_rate': 0.08757234561751288, 'num_leaves': 82, 'max_depth': 15, 'min_child_samples': 79, 'subsample': 0.2425780079735833, 'colsample_bytree': 0.655584410870687, 'reg_alpha': 4.331104567156354, 'reg_lambda': 4.208172620838156, 'n_estimators': 200}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054552 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048343 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051347 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:47:24,413] Trial 22 finished with value: 0.8429892432529514 and parameters: {'learning_rate': 0.11718836991845487, 'num_leaves': 96, 'max_depth': 13, 'min_child_samples': 88, 'subsample': 0.34751738285205613, 'colsample_bytree': 0.6275380311777599, 'reg_alpha': 4.812270985116484, 'reg_lambda': 4.154890601827593, 'n_estimators': 200}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040049 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043067 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038421 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:48:10,136] Trial 23 finished with value: 0.8430091469100901 and parameters: {'learning_rate': 0.09419773451066397, 'num_leaves': 74, 'max_depth': 14, 'min_child_samples': 89, 'subsample': 0.21570218607480146, 'colsample_bytree': 0.46786069232957506, 'reg_alpha': 4.930792233759262, 'reg_lambda': 4.615401567160009, 'n_estimators': 200}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066738 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.054441 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:50:03,638] Trial 24 finished with value: 0.8420371568531881 and parameters: {'learning_rate': 0.12312625878779294, 'num_leaves': 98, 'max_depth': 5, 'min_child_samples': 73, 'subsample': 0.37370348462966785, 'colsample_bytree': 0.6012536521012449, 'reg_alpha': 3.6508489069915555, 'reg_lambda': 3.1666588292495206, 'n_estimators': 400}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051728 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053576 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:50:34,157] Trial 25 finished with value: 0.837625979842731 and parameters: {'learning_rate': 0.0835253349745936, 'num_leaves': 120, 'max_depth': 10, 'min_child_samples': 52, 'subsample': 0.5733500060754074, 'colsample_bytree': 0.2628504701991603, 'reg_alpha': 3.464884096309068, 'reg_lambda': 4.457323850976531, 'n_estimators': 100}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042871 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057599 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049632 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:53:34,954] Trial 26 finished with value: 0.8423933559828972 and parameters: {'learning_rate': 0.06802958723677621, 'num_leaves': 66, 'max_depth': -1, 'min_child_samples': 61, 'subsample': 0.1957615040339021, 'colsample_bytree': 0.4906189445592085, 'reg_alpha': 4.455222383305399, 'reg_lambda': 4.98374706818222, 'n_estimators': 900}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045605 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057131 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1041
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


[I 2026-01-04 12:54:13,155] Trial 27 finished with value: 0.843076343781866 and parameters: {'learning_rate': 0.14463340672888714, 'num_leaves': 86, 'max_depth': 7, 'min_child_samples': 84, 'subsample': 0.4937359851590104, 'colsample_bytree': 0.8301130533339118, 'reg_alpha': 3.980713734081538, 'reg_lambda': 3.6578052041726288, 'n_estimators': 200}. Best is trial 7 with value: 0.843454664545216.


[LightGBM] [Info] Number of positive: 202130, number of negative: 1010650
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049993 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1310
[LightGBM] [Info] Number of data points in the train set: 1212780, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




[LightGBM] [Info] Number of positive: 202130, number of negative: 1010651
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.048116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1040
[LightGBM] [Info] Number of data points in the train set: 1212781, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439


In [149]:
print(study.best_params)
print(study.best_value)

{'learning_rate': 0.056870550481473985, 'num_leaves': 49, 'max_depth': 4, 'min_child_samples': 86, 'subsample': 0.3405305779866419, 'colsample_bytree': 0.7669391171858071, 'reg_alpha': 1.102479528599375, 'reg_lambda': 1.2334000948279056, 'n_estimators': 400}
0.8534142900533228


# Model With Best Parameteres

In [31]:
# create model
best_params = {'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'learning_rate': 0.056870550481473985, 'num_leaves': 49, 'max_depth': 4, 'min_child_samples': 86, 'subsample': 0.3405305779866419, 
        'colsample_bytree': 0.7669391171858071, 'reg_alpha': 1.102479528599375,
        'reg_lambda': 1.2334000948279056, 'n_estimators': 400,
        'n_jobs': -1,'is_unbalance': True}

sampler = SMOTE(sampling_strategy=0.2, random_state=42)
lgbm = LGBMClassifier(**best_params,random_state=42)
pipeline = Pipeline([
      ('preprocessing',preprocessor),
      ('sampling',sampler),
      ('model',lgbm)
])

pipeline.fit(x_train,y_train)
y_pred = pipeline.predict(x_test)
print(accuracy_score(y_test,y_pred))

[LightGBM] [Info] Number of positive: 303195, number of negative: 1515976
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.017314 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 823
[LightGBM] [Info] Number of data points in the train set: 1819171, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609439
[LightGBM] [Info] Start training from score -1.609439




0.9400809503784693


In [32]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.94      0.97    379030
           1       0.06      0.99      0.11      1450

    accuracy                           0.94    380480
   macro avg       0.53      0.96      0.54    380480
weighted avg       1.00      0.94      0.97    380480



In [33]:
y_prob = pipeline.predict_proba(x_test)[:,1]

precision,recall,pr_thresold = precision_recall_curve(y_test,y_prob)



In [34]:
f1_scores = 2 * (precision * recall) / (precision + recall + 1e-12)

In [35]:
best_idx = np.nanargmax(f1_scores[:-1]) # exclude last element as it corresponds to threshold=none
best_threshold = pr_thresold[best_idx]
best_precision = precision[best_idx]
best_recall = recall[best_idx]
best_f1 = f1_scores[best_idx]
print(f"Best threshold by F1: {best_threshold:.4f}")
print(f"Precision={best_precision:.4f}, Recall={best_recall:.4f}, F1={best_f1:.4f}")

Best threshold by F1: 0.9942
Precision=0.9784, Recall=0.7186, F1=0.8286
