In [1]:
## Importing Required Libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import VarianceThreshold

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, accuracy_score

from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import plot_tree

import optuna
from sklearn.ensemble import RandomForestClassifier
import time
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import log_loss
from sklearn.metrics import hinge_loss
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import shap

In [2]:
path = "/Users/abhi/Downloads/randomairline.csv"
train_data = pd.read_csv(path).drop('Unnamed: 0', axis=1)

print(train_data.shape)
print(train_data.info())

(103904, 24)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103904 entries, 0 to 103903
Data columns (total 24 columns):
 #   Column                             Non-Null Count   Dtype  
---  ------                             --------------   -----  
 0   id                                 103904 non-null  int64  
 1   Gender                             103904 non-null  object 
 2   Customer Type                      103904 non-null  object 
 3   Age                                103904 non-null  int64  
 4   Type of Travel                     103904 non-null  object 
 5   Class                              103904 non-null  object 
 6   Flight Distance                    103904 non-null  int64  
 7   Inflight wifi service              103904 non-null  int64  
 8   Departure/Arrival time convenient  103904 non-null  int64  
 9   Ease of Online booking             103904 non-null  int64  
 10  Gate location                      103904 non-null  int64  
 11  Food and drink            

In [3]:
path = "/Users/abhi/Downloads/testingshiz.csv"
test_data = pd.read_csv(path).drop('Unnamed: 0', axis=1)

print(test_data.shape)
print(test_data.info())

(25976, 24)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25976 entries, 0 to 25975
Data columns (total 24 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   id                                 25976 non-null  int64  
 1   Gender                             25976 non-null  object 
 2   Customer Type                      25976 non-null  object 
 3   Age                                25976 non-null  int64  
 4   Type of Travel                     25976 non-null  object 
 5   Class                              25976 non-null  object 
 6   Flight Distance                    25976 non-null  int64  
 7   Inflight wifi service              25976 non-null  int64  
 8   Departure/Arrival time convenient  25976 non-null  int64  
 9   Ease of Online booking             25976 non-null  int64  
 10  Gate location                      25976 non-null  int64  
 11  Food and drink                     25976 n

In [4]:
train_data.dropna(subset='Arrival Delay in Minutes', inplace= True)
print(train_data.isna().sum())

id                                   0
Gender                               0
Customer Type                        0
Age                                  0
Type of Travel                       0
Class                                0
Flight Distance                      0
Inflight wifi service                0
Departure/Arrival time convenient    0
Ease of Online booking               0
Gate location                        0
Food and drink                       0
Online boarding                      0
Seat comfort                         0
Inflight entertainment               0
On-board service                     0
Leg room service                     0
Baggage handling                     0
Checkin service                      0
Inflight service                     0
Cleanliness                          0
Departure Delay in Minutes           0
Arrival Delay in Minutes             0
satisfaction                         0
dtype: int64


In [5]:
test_data.dropna(subset='Arrival Delay in Minutes', inplace= True)
print(test_data.isna().sum())

id                                   0
Gender                               0
Customer Type                        0
Age                                  0
Type of Travel                       0
Class                                0
Flight Distance                      0
Inflight wifi service                0
Departure/Arrival time convenient    0
Ease of Online booking               0
Gate location                        0
Food and drink                       0
Online boarding                      0
Seat comfort                         0
Inflight entertainment               0
On-board service                     0
Leg room service                     0
Baggage handling                     0
Checkin service                      0
Inflight service                     0
Cleanliness                          0
Departure Delay in Minutes           0
Arrival Delay in Minutes             0
satisfaction                         0
dtype: int64


In [11]:
data = train_data.copy()

data['satisfaction'] = data['satisfaction'].apply(lambda x: 1 if x == 'satisfied' else 0)
print(data['satisfaction'].value_counts())


label_encoder = LabelEncoder()

data['Gender'] = label_encoder.fit_transform(data['Gender'])

data = pd.get_dummies(data, columns=['Customer Type', 'Type of Travel', 'Class'], drop_first=True)


data['Ease of Booking * Online Boarding'] = data['Ease of Online booking'] * data['Online boarding']

data['Flight Distance * Seat Comfort'] = data['Flight Distance'] * data['Seat comfort']


data['Delay per Distance'] = (
    (data['Departure Delay in Minutes'] + data['Arrival Delay in Minutes']) / 
    data['Flight Distance']).replace([float('inf'), -float('inf')], 0)

scaler = MinMaxScaler()

data['Delay per Distance'] = scaler.fit_transform(data[['Delay per Distance']])


service_features = [
    'Inflight wifi service', 'Departure/Arrival time convenient', 'Ease of Online booking',
    'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
    'Inflight entertainment', 'On-board service', 'Leg room service',
    'Baggage handling', 'Checkin service', 'Inflight service', 'Cleanliness']

# Compute the mean score for service quality
data['Service Quality Score'] = data[service_features].mean(axis=1)

# Normalize the new feature for consistent scaling
data['Service Quality Score'] = scaler.fit_transform(data[['Service Quality Score']])


data.drop(['Ease of Online booking','Online boarding','Flight Distance',
                 'Seat comfort','Departure Delay in Minutes','Arrival Delay in Minutes',
                'Flight Distance'], axis=1, inplace=True)


numeric_data = data.select_dtypes(include=['number'])


# Remove highly correlated features
corr_matrix = numeric_data.corr().abs()
upper = corr_matrix.where(
    np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

to_drop = [column for column in upper.columns if any(upper[column] > 0.8)]

data_reduced = data.drop(columns=to_drop)

0    58697
1    44897
Name: satisfaction, dtype: int64


In [12]:
data1 = test_data.copy()

data1['satisfaction'] = data1['satisfaction'].apply(lambda x: 1 if x == 'satisfied' else 0)
print(data1['satisfaction'].value_counts())


label_encoder = LabelEncoder()

data1['Gender'] = label_encoder.fit_transform(data1['Gender'])

data1 = pd.get_dummies(data1, columns=['Customer Type', 'Type of Travel', 'Class'], drop_first=True)


data1['Ease of Booking * Online Boarding'] = data1['Ease of Online booking'] * data1['Online boarding']

data1['Flight Distance * Seat Comfort'] = data1['Flight Distance'] * data1['Seat comfort']


data1['Delay per Distance'] = (
    (data1['Departure Delay in Minutes'] + data1['Arrival Delay in Minutes']) / 
    data1['Flight Distance']).replace([float('inf'), -float('inf')], 0)

scaler = MinMaxScaler()

data1['Delay per Distance'] = scaler.fit_transform(data1[['Delay per Distance']])


service_features = [
    'Inflight wifi service', 'Departure/Arrival time convenient', 'Ease of Online booking',
    'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
    'Inflight entertainment', 'On-board service', 'Leg room service',
    'Baggage handling', 'Checkin service', 'Inflight service', 'Cleanliness']

# Compute the mean score for service quality
data1['Service Quality Score'] = data1[service_features].mean(axis=1)

# Normalize the new feature for consistent scaling
data1['Service Quality Score'] = scaler.fit_transform(data1[['Service Quality Score']])


data1.drop(['Ease of Online booking','Online boarding','Flight Distance',
                 'Seat comfort','Departure Delay in Minutes','Arrival Delay in Minutes',
                'Flight Distance'], axis=1, inplace=True)


numeric_data1 = data1.select_dtypes(include=['number'])


# Remove highly correlated features
corr_matrix = numeric_data1.corr().abs()
upper = corr_matrix.where(
    np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

to_drop = [column for column in upper.columns if any(upper[column] > 0.8)]

data_reduced1 = data1.drop(columns=to_drop)

0    14528
1    11365
Name: satisfaction, dtype: int64


In [13]:
X = data_reduced.drop(columns=['satisfaction'])
y = data_reduced['satisfaction']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [14]:
# Define Optuna Objective Function

def objective(trial):
    param = {
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 50),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0)
    }
    
    model = LGBMClassifier(**param, random_state=42)
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return score

# Optimize Hyperparameters with Optuna
start_time = time.time()
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # Adjust n_trials based on available compute
end_time = time.time()

# Train LightGBM with Best Parameters
best_params = study.best_params
print("Best Parameters:", best_params)

final_model = LGBMClassifier(**best_params, random_state=42)
final_model.fit(X_train, y_train)

# Evaluate the Model
y_pred_light = final_model.predict(X_test)

accuracy_train = accuracy_score(y_test, y_pred_light)
conf_matrix_train = confusion_matrix(y_test, y_pred_light)
class_report_train = classification_report(y_test, y_pred_light)

# Print evaluation results
print("Accuracy:", accuracy_train)
print("Confusion Matrix:")
print(conf_matrix_train)
print("Classification Report:")
print(class_report_train)

[I 2024-11-24 16:35:23,829] A new study created in memory with name: no-name-04c6f6bf-3a31-47bb-b2e1-82a72249d6c4


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001473 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001207 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:25,898] Trial 0 finished with value: 0.9590588235294117 and parameters: {'num_leaves': 70, 'learning_rate': 0.16198248467442106, 'n_estimators': 242, 'max_depth': 4, 'min_child_samples': 25, 'colsample_bytree': 0.6731667626832069, 'subsample': 0.6927274234076806}. Best is trial 0 with value: 0.9590588235294117.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001318 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001379 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:36,403] Trial 1 finished with value: 0.9624012066365009 and parameters: {'num_leaves': 90, 'learning_rate': 0.0653370496093011, 'n_estimators': 496, 'max_depth': 10, 'min_child_samples': 47, 'colsample_bytree': 0.8270965794710655, 'subsample': 0.7834882886990084}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001075 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:45,015] Trial 2 finished with value: 0.9603016591251885 and parameters: {'num_leaves': 99, 'learning_rate': 0.19627308603802407, 'n_estimators': 444, 'max_depth': 7, 'min_child_samples': 32, 'colsample_bytree': 0.8850054919202062, 'subsample': 0.7734099133350681}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001401 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001498 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:49,209] Trial 3 finished with value: 0.9623408748114631 and parameters: {'num_leaves': 53, 'learning_rate': 0.06647372641213563, 'n_estimators': 240, 'max_depth': 8, 'min_child_samples': 35, 'colsample_bytree': 0.6734501268162083, 'subsample': 0.8820368242507253}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001274 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000946 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:54,588] Trial 4 finished with value: 0.9623891402714932 and parameters: {'num_leaves': 64, 'learning_rate': 0.0645663073726519, 'n_estimators': 289, 'max_depth': 8, 'min_child_samples': 50, 'colsample_bytree': 0.6350402945932264, 'subsample': 0.6259123713810381}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001584 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:56,879] Trial 5 finished with value: 0.95926395173454 and parameters: {'num_leaves': 75, 'learning_rate': 0.17314385156013598, 'n_estimators': 239, 'max_depth': 4, 'min_child_samples': 35, 'colsample_bytree': 0.6839726734492783, 'subsample': 0.9940539574765274}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001445 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:35:58,743] Trial 6 finished with value: 0.95857616892911 and parameters: {'num_leaves': 74, 'learning_rate': 0.1729543012315859, 'n_estimators': 203, 'max_depth': 4, 'min_child_samples': 26, 'colsample_bytree': 0.7980208421833257, 'subsample': 0.9005428362253354}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001764 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001793 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:00,653] Trial 7 finished with value: 0.9439638009049774 and parameters: {'num_leaves': 70, 'learning_rate': 0.04020568658882739, 'n_estimators': 214, 'max_depth': 4, 'min_child_samples': 15, 'colsample_bytree': 0.7002761867453702, 'subsample': 0.9878132541182294}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001376 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:05,535] Trial 8 finished with value: 0.947101055806938 and parameters: {'num_leaves': 39, 'learning_rate': 0.01100129050385079, 'n_estimators': 373, 'max_depth': 6, 'min_child_samples': 41, 'colsample_bytree': 0.6878840763138243, 'subsample': 0.6405575161324417}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001275 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001714 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:07,510] Trial 9 finished with value: 0.9543891402714932 and parameters: {'num_leaves': 64, 'learning_rate': 0.08394801913433288, 'n_estimators': 226, 'max_depth': 4, 'min_child_samples': 38, 'colsample_bytree': 0.9331310427098789, 'subsample': 0.7410777185945188}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001634 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001399 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:10,569] Trial 10 finished with value: 0.9623288084464555 and parameters: {'num_leaves': 100, 'learning_rate': 0.12038900272506517, 'n_estimators': 114, 'max_depth': 10, 'min_child_samples': 50, 'colsample_bytree': 0.8035818980187902, 'subsample': 0.8504473205987619}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000937 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001052 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:21,777] Trial 11 finished with value: 0.9616892911010557 and parameters: {'num_leaves': 88, 'learning_rate': 0.11327515219647492, 'n_estimators': 495, 'max_depth': 10, 'min_child_samples': 50, 'colsample_bytree': 0.7999774727322062, 'subsample': 0.600241881708047}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001024 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:26,904] Trial 12 finished with value: 0.9622202111613877 and parameters: {'num_leaves': 43, 'learning_rate': 0.05801881293198181, 'n_estimators': 330, 'max_depth': 9, 'min_child_samples': 44, 'colsample_bytree': 0.6093272415883961, 'subsample': 0.6991049601396289}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001229 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:31,575] Trial 13 finished with value: 0.9597707390648565 and parameters: {'num_leaves': 24, 'learning_rate': 0.02840633984076734, 'n_estimators': 385, 'max_depth': 8, 'min_child_samples': 45, 'colsample_bytree': 0.8728451441113636, 'subsample': 0.8070595589323828}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001038 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001278 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:36,795] Trial 14 finished with value: 0.9614479638009051 and parameters: {'num_leaves': 83, 'learning_rate': 0.08865431314321459, 'n_estimators': 313, 'max_depth': 6, 'min_child_samples': 10, 'colsample_bytree': 0.7598549655131489, 'subsample': 0.6544317878114944}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001337 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:36:43,994] Trial 15 finished with value: 0.9614358974358975 and parameters: {'num_leaves': 55, 'learning_rate': 0.12505975703605887, 'n_estimators': 412, 'max_depth': 9, 'min_child_samples': 46, 'colsample_bytree': 0.9818591899913891, 'subsample': 0.7433616840237173}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014324 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[Lig

[I 2024-11-24 16:36:58,579] Trial 16 finished with value: 0.9618340874811462 and parameters: {'num_leaves': 88, 'learning_rate': 0.05731389553123973, 'n_estimators': 499, 'max_depth': 9, 'min_child_samples': 41, 'colsample_bytree': 0.6048486572185267, 'subsample': 0.929524962064365}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001087 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:37:01,026] Trial 17 finished with value: 0.9613755656108598 and parameters: {'num_leaves': 45, 'learning_rate': 0.09620571328840884, 'n_estimators': 149, 'max_depth': 7, 'min_child_samples': 49, 'colsample_bytree': 0.8542770208420304, 'subsample': 0.8161767323765499}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001044 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:37:04,676] Trial 18 finished with value: 0.9615444947209653 and parameters: {'num_leaves': 28, 'learning_rate': 0.07097200296195759, 'n_estimators': 287, 'max_depth': 8, 'min_child_samples': 22, 'colsample_bytree': 0.7552850105022432, 'subsample': 0.6049592807306726}. Best is trial 1 with value: 0.9624012066365009.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001036 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:37:13,601] Trial 19 finished with value: 0.9631131221719457 and parameters: {'num_leaves': 84, 'learning_rate': 0.04004646046937793, 'n_estimators': 351, 'max_depth': 10, 'min_child_samples': 41, 'colsample_bytree': 0.739616888096926, 'subsample': 0.6931136725427435}. Best is trial 19 with value: 0.9631131221719457.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001258 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:37:25,637] Trial 20 finished with value: 0.9634147812971342 and parameters: {'num_leaves': 83, 'learning_rate': 0.027042320572648243, 'n_estimators': 449, 'max_depth': 10, 'min_child_samples': 40, 'colsample_bytree': 0.7452810938156927, 'subsample': 0.6912731102430536}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000969 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001512 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:37:37,060] Trial 21 finished with value: 0.9630165912518853 and parameters: {'num_leaves': 82, 'learning_rate': 0.037646057487368476, 'n_estimators': 448, 'max_depth': 10, 'min_child_samples': 40, 'colsample_bytree': 0.7450910953506358, 'subsample': 0.684127873556999}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001149 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001444 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:37:48,590] Trial 22 finished with value: 0.9613634992458522 and parameters: {'num_leaves': 80, 'learning_rate': 0.0111425267422827, 'n_estimators': 446, 'max_depth': 10, 'min_child_samples': 40, 'colsample_bytree': 0.7381296817666664, 'subsample': 0.6813214232520194}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001339 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:01,090] Trial 23 finished with value: 0.9625460030165913 and parameters: {'num_leaves': 92, 'learning_rate': 0.03593041371157384, 'n_estimators': 447, 'max_depth': 9, 'min_child_samples': 36, 'colsample_bytree': 0.7276876323596172, 'subsample': 0.7224148218675054}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001354 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001300 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:11,907] Trial 24 finished with value: 0.9631372549019608 and parameters: {'num_leaves': 81, 'learning_rate': 0.02629212591904513, 'n_estimators': 354, 'max_depth': 10, 'min_child_samples': 31, 'colsample_bytree': 0.7703097979409551, 'subsample': 0.667093267968596}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001364 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001322 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:22,036] Trial 25 finished with value: 0.9622322775263952 and parameters: {'num_leaves': 80, 'learning_rate': 0.023071600245146852, 'n_estimators': 344, 'max_depth': 9, 'min_child_samples': 30, 'colsample_bytree': 0.779314959934398, 'subsample': 0.6455076004180795}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001248 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001240 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:25,243] Trial 26 finished with value: 0.9467752639517346 and parameters: {'num_leaves': 94, 'learning_rate': 0.05164241330659243, 'n_estimators': 366, 'max_depth': 3, 'min_child_samples': 31, 'colsample_bytree': 0.8437966319422788, 'subsample': 0.721180319052556}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001279 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:35,772] Trial 27 finished with value: 0.9628838612368025 and parameters: {'num_leaves': 72, 'learning_rate': 0.024268396060310227, 'n_estimators': 388, 'max_depth': 10, 'min_child_samples': 19, 'colsample_bytree': 0.7254734398516324, 'subsample': 0.6764196441674658}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001421 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:46,137] Trial 28 finished with value: 0.9629200603318251 and parameters: {'num_leaves': 63, 'learning_rate': 0.04524193423590255, 'n_estimators': 401, 'max_depth': 9, 'min_child_samples': 29, 'colsample_bytree': 0.6461913901227563, 'subsample': 0.7576077500641212}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001421 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001482 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:38:50,903] Trial 29 finished with value: 0.9594811463046756 and parameters: {'num_leaves': 78, 'learning_rate': 0.07853377345403827, 'n_estimators': 281, 'max_depth': 5, 'min_child_samples': 26, 'colsample_bytree': 0.7104676481555459, 'subsample': 0.7075233540610709}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001400 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:39:01,498] Trial 30 finished with value: 0.9612428355957767 and parameters: {'num_leaves': 85, 'learning_rate': 0.14490332482427587, 'n_estimators': 346, 'max_depth': 7, 'min_child_samples': 43, 'colsample_bytree': 0.6554007292369692, 'subsample': 0.6586924792139026}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001443 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001425 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:39:16,533] Trial 31 finished with value: 0.9632096530920059 and parameters: {'num_leaves': 95, 'learning_rate': 0.03301568489623578, 'n_estimators': 464, 'max_depth': 10, 'min_child_samples': 38, 'colsample_bytree': 0.766192987200835, 'subsample': 0.6858917683119907}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001644 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:39:31,321] Trial 32 finished with value: 0.9630527903469079 and parameters: {'num_leaves': 96, 'learning_rate': 0.02121497665103647, 'n_estimators': 421, 'max_depth': 10, 'min_child_samples': 37, 'colsample_bytree': 0.7741448534577152, 'subsample': 0.6720440767700036}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001062 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001292 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:39:44,985] Trial 33 finished with value: 0.9632458521870285 and parameters: {'num_leaves': 90, 'learning_rate': 0.033911845813360694, 'n_estimators': 466, 'max_depth': 10, 'min_child_samples': 33, 'colsample_bytree': 0.8283967955335452, 'subsample': 0.6276188764789529}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001471 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001229 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:40:00,853] Trial 34 finished with value: 0.961158371040724 and parameters: {'num_leaves': 94, 'learning_rate': 0.011389466876743607, 'n_estimators': 473, 'max_depth': 9, 'min_child_samples': 34, 'colsample_bytree': 0.8200133640880433, 'subsample': 0.6228655716754788}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001117 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:40:13,496] Trial 35 finished with value: 0.9624012066365009 and parameters: {'num_leaves': 89, 'learning_rate': 0.050800514862885865, 'n_estimators': 475, 'max_depth': 8, 'min_child_samples': 28, 'colsample_bytree': 0.9015773158812391, 'subsample': 0.6258776834861112}. Best is trial 20 with value: 0.9634147812971342.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001079 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001564 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:40:26,344] Trial 36 finished with value: 0.963523378582202 and parameters: {'num_leaves': 100, 'learning_rate': 0.02920679737421864, 'n_estimators': 432, 'max_depth': 10, 'min_child_samples': 33, 'colsample_bytree': 0.820926771629586, 'subsample': 0.7872916460054615}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000955 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001471 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:40:38,845] Trial 37 finished with value: 0.9619547511312219 and parameters: {'num_leaves': 100, 'learning_rate': 0.07290772399796783, 'n_estimators': 427, 'max_depth': 9, 'min_child_samples': 33, 'colsample_bytree': 0.9115666419656746, 'subsample': 0.7759271513412673}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000943 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001496 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:40:54,491] Trial 38 finished with value: 0.9615324283559576 and parameters: {'num_leaves': 96, 'learning_rate': 0.1973225505321248, 'n_estimators': 486, 'max_depth': 10, 'min_child_samples': 38, 'colsample_bytree': 0.836653138299815, 'subsample': 0.7908716997939478}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001471 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001352 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:41:09,954] Trial 39 finished with value: 0.9620392156862746 and parameters: {'num_leaves': 90, 'learning_rate': 0.0320574684215519, 'n_estimators': 474, 'max_depth': 8, 'min_child_samples': 33, 'colsample_bytree': 0.8123544369147316, 'subsample': 0.824467594792128}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001651 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001291 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:41:22,321] Trial 40 finished with value: 0.9627149321266968 and parameters: {'num_leaves': 68, 'learning_rate': 0.06159146072081405, 'n_estimators': 463, 'max_depth': 10, 'min_child_samples': 35, 'colsample_bytree': 0.8605404068177025, 'subsample': 0.8350061993841106}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001411 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001205 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:41:36,854] Trial 41 finished with value: 0.9630286576168929 and parameters: {'num_leaves': 76, 'learning_rate': 0.02288058034823253, 'n_estimators': 438, 'max_depth': 10, 'min_child_samples': 32, 'colsample_bytree': 0.778293796497483, 'subsample': 0.660488311746975}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001304 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001473 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:41:49,703] Trial 42 finished with value: 0.9629683257918552 and parameters: {'num_leaves': 97, 'learning_rate': 0.04663567073849437, 'n_estimators': 407, 'max_depth': 10, 'min_child_samples': 27, 'colsample_bytree': 0.7831725378490687, 'subsample': 0.7200585305973346}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001778 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001806 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:42:03,277] Trial 43 finished with value: 0.9623408748114631 and parameters: {'num_leaves': 92, 'learning_rate': 0.01784518192633513, 'n_estimators': 459, 'max_depth': 9, 'min_child_samples': 23, 'colsample_bytree': 0.825198566820179, 'subsample': 0.6233819072059055}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000999 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001132 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:42:15,931] Trial 44 finished with value: 0.9633665158371041 and parameters: {'num_leaves': 86, 'learning_rate': 0.031771012688120916, 'n_estimators': 425, 'max_depth': 10, 'min_child_samples': 38, 'colsample_bytree': 0.7127120448541161, 'subsample': 0.8594999713911479}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:42:27,037] Trial 45 finished with value: 0.9631131221719457 and parameters: {'num_leaves': 100, 'learning_rate': 0.031970065797543364, 'n_estimators': 396, 'max_depth': 9, 'min_child_samples': 38, 'colsample_bytree': 0.7056692228609983, 'subsample': 0.9049691457884396}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001168 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001078 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:42:38,643] Trial 46 finished with value: 0.9629562594268476 and parameters: {'num_leaves': 86, 'learning_rate': 0.04613381450067916, 'n_estimators': 432, 'max_depth': 10, 'min_child_samples': 43, 'colsample_bytree': 0.6686098439734521, 'subsample': 0.8555012876996396}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001361 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:42:44,598] Trial 47 finished with value: 0.9599396681749622 and parameters: {'num_leaves': 92, 'learning_rate': 0.055165503308858166, 'n_estimators': 460, 'max_depth': 5, 'min_child_samples': 36, 'colsample_bytree': 0.885608042544767, 'subsample': 0.8794285036496197}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001358 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:42:51,451] Trial 48 finished with value: 0.9617978883861238 and parameters: {'num_leaves': 87, 'learning_rate': 0.038429869746760964, 'n_estimators': 265, 'max_depth': 8, 'min_child_samples': 39, 'colsample_bytree': 0.7991170622605371, 'subsample': 0.7491288743551441}. Best is trial 36 with value: 0.963523378582202.


[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001442 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433409 -> initscore=-0.267957
[LightGBM] [Info] Start training from score -0.267957
[LightGBM] [Info] Number of positive: 28735, number of negative: 37565
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001319 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 987
[LightGBM] [Info] Number of data points in the train set: 66300, number of used features: 22
[LightGBM] [Info] [b

[I 2024-11-24 16:43:03,158] Trial 49 finished with value: 0.9613273001508296 and parameters: {'num_leaves': 97, 'learning_rate': 0.016778040779542536, 'n_estimators': 419, 'max_depth': 9, 'min_child_samples': 42, 'colsample_bytree': 0.6945233256913746, 'subsample': 0.7647982090559626}. Best is trial 36 with value: 0.963523378582202.


Best Parameters: {'num_leaves': 100, 'learning_rate': 0.02920679737421864, 'n_estimators': 432, 'max_depth': 10, 'min_child_samples': 33, 'colsample_bytree': 0.820926771629586, 'subsample': 0.7872916460054615}
[LightGBM] [Info] Number of positive: 35918, number of negative: 46957
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001630 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 82875, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.433400 -> initscore=-0.267994
[LightGBM] [Info] Start training from score -0.267994
Accuracy: 0.965007963704812
Confusion Matrix:
[[11504   236]
 [  489  8490]]
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97     11740
           1       0.97 

In [16]:
X_test = data1.drop(["satisfaction"], axis = 1)

In [17]:
y_pred_test = final_model.predict(X_test)

accuracy_test = accuracy_score(data1["satisfaction"], y_pred_test)
conf_matrix_test = confusion_matrix(data1["satisfaction"], y_pred_test)
class_report_test = classification_report(data1["satisfaction"], y_pred_test)

# Print evaluation results
print("Accuracy:", accuracy_test)
print("Confusion Matrix:")
print(conf_matrix_test)
print("Classification Report:")
print(class_report_test)

Accuracy: 0.9632333062989997
Confusion Matrix:
[[14203   325]
 [  627 10738]]
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97     14528
           1       0.97      0.94      0.96     11365

    accuracy                           0.96     25893
   macro avg       0.96      0.96      0.96     25893
weighted avg       0.96      0.96      0.96     25893

