In [1]:
import pandas as pd
import sklearn as sklearn
import numpy as np
import time
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lg
import optuna
import optuna.integration.lightgbm as lgb
import xgboost as xgb
import pickle
import os
from datetime import date

from typing import List, TypeVar, Dict
import abc

from sklearn.pipeline import Pipeline
from pandas.core.frame import DataFrame
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import RepeatedKFold
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, roc_auc_score, precision_score, average_precision_score
from sklearn.metrics import classification_report, confusion_matrix
from kydavra import FisherSelector

from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

## Helper functions

In [2]:
_СORRELATION_MATRIX_PATH_ = 'corr_matrix.pcl'
_СHURN_PATH_ = 'churn_model.pcl'
_CHURN_PATH_REDUCED_ = 'churn_model_reduced.pcl' 
_MAX_FEATURES_ = 100

In [3]:
# https://towardsdatascience.com/interpreting-roc-curve-and-roc-auc-for-classification-evaluation-28ec3983f077
# https://neptune.ai/blog/f1-score-accuracy-roc-auc-pr-auc
class Metrics:
    
    def roc_auc(y_true, predicted):
        return roc_auc_score(y_true, predicted)
    
    def auc(y_true, predicted):
        return average_precision_score(y_true, predicted)
    
    def classification_report(y_true, predicted):
        return classification_report(y_true, predicted)
    
    def confusion_matrix(y_true, predicted):
        return confusion_matrix(y_true, predicted)

In [4]:
def get_float_cols(df:DataFrame) -> List[str]:
    return df.select_dtypes(include=float).columns.tolist()

def get_int_cols(df:DataFrame) -> List[str]:
    return df.select_dtypes(include=int).columns.tolist()

def get_number_cols(df:DataFrame) -> List[str]:
    return df.select_dtypes(np.number).columns.tolist()

def get_obj_cols(df:DataFrame) -> List[str]:
    return list(df.select_dtypes(include=object).columns)

def print_empty_values(df:DataFrame):
    col_names_with_na = list(df.isna().sum()[lambda x: x > 0].index)
    col_names_with_empty = list(df.isnull().sum()[lambda x: x > 0].index)
    result = set(col_names_with_na) | set(col_names_with_empty)    
    print('Columns with NA or empty: {0}'.format(result))
    
def get_empty_cols(df:DataFrame):
    return list(df.isnull().sum()[lambda x: x > 0].index)

In [5]:
def reduce_mem_usage(df, verbose=True) -> pd.DataFrame:
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [6]:
def split_test_train(df:DataFrame):
    test, train = df[df['ind'].eq('test')], df[df['ind'].eq('train')]
    test = test.drop(['ind'], axis=1)
    train = train.drop(['ind'], axis=1)
    return test, train
    
def combine_test_train(test:DataFrame, train:DataFrame):
    combine = pd.concat([test.assign(ind='test'), train.assign(ind='train')])
    target = train['target']
    test_ids = test['Id']
    return combine, target, test_ids

def combine_data(train_df, train_num, train_dpi, test_df, test_num, test_dpi):
    df_combine = pd.concat([train_df.assign(ind='train'), test_df.assign(ind='test')])
    df_combine_num = pd.concat([train_num.assign(ind='train'), test_num.assign(ind='test')])
    df_combine_dpi = pd.concat([train_dpi.assign(ind='train'), test_dpi.assign(ind='test')])
    return df_combine, df_combine_num, df_combine_dpi

In [7]:
def reduce_churn_data():

    if os.path.exists(_CHURN_PATH_REDUCED_) == False:
        if os.path.exists(_СHURN_PATH_) == True: 
    
            with open(_СHURN_PATH_, 'rb') as file:
                deserialized_object = pickle.load(file)

            deserialized_object = list(deserialized_object)
        
            # reduce size
            train_df= reduce_mem_usage(deserialized_object[1][1])
            train_num_reduced = reduce_mem_usage(deserialized_object[1][2])
            train_dpi_reduced = reduce_mem_usage(deserialized_object[1][3])

            test_df = reduce_mem_usage(deserialized_object[2][1])
            test_num_reduced = reduce_mem_usage(deserialized_object[2][2])
            test_dpi_reduced = reduce_mem_usage(deserialized_object[2][3])
            
            # dump data back
            deserialized_object = tuple([[train_df, train_num_reduced, train_dpi_reduced], [test_df, test_num_reduced, test_dpi_reduced]])
            pickle.dump(deserialized_object, open(_CHURN_PATH_REDUCED_, 'wb'))
    else:
        print(f'{_CHURN_PATH_REDUCED_} already exist')

In [8]:
def load_churn_data():

    with open(_СHURN_PATH_, 'rb') as file:
        deserialized_object = pickle.load(file)

    train_df = deserialized_object[1][1]
    train_num = deserialized_object[1][2]
    train_dpi = deserialized_object[1][3]

    test_df = deserialized_object[2][1]
    test_num = deserialized_object[2][2]
    test_dpi = deserialized_object[2][3]

    return train_df, train_num, train_dpi, test_df, test_num, test_dpi

In [9]:
def load_churn_reduced():

    with open(_CHURN_PATH_REDUCED_, 'rb') as file:
        deserialized_object = pickle.load(file)

    train_df = deserialized_object[0][0]
    train_num = deserialized_object[0][1]
    train_dpi = deserialized_object[0][2]

    test_df = deserialized_object[1][0]
    test_num = deserialized_object[1][1]
    test_dpi = deserialized_object[1][2]

    return train_df, train_num, train_dpi, test_df, test_num, test_dpi

In [10]:
def is_short_number(number:str) -> bool:
    if (number.isdigit() and len(number) <= 4):
        return True
    return False

def is_life(number:str) -> bool:
    if (len(number) == 12 and (number[2:5] in ['063', '093' ])):
        return True
    return False

def is_kyivstar(number:str) -> bool:
    if (len(number) == 12 and (number[2:5] in ['067', '097', '068', '098'])):
        return True
    return False

### Feature selectors

In [11]:
class FeatureHelper:
    
    def get_feature_correlation_df(corr_m, remove_duplicates=True, remove_self_correlations=True):
    
        corr_matrix_abs = corr_m.abs()
        corr_matrix_abs_us = corr_matrix_abs.unstack()
        sorted_correlated_features = corr_matrix_abs_us \
            .sort_values(kind="quicksort", ascending=False) \
            .reset_index()

        # Remove comparisons of the same feature
        if remove_self_correlations:
            sorted_correlated_features = sorted_correlated_features[
                (sorted_correlated_features.level_0 != sorted_correlated_features.level_1)
            ]

        # Remove duplicates
        if remove_duplicates:
            sorted_correlated_features = sorted_correlated_features.iloc[:-2:2]

        # Create meaningful names for the columns
        sorted_correlated_features.columns = ['f1', 'f2', 'corr']

        return sorted_correlated_features
    
    def get_correlation_matrix(df:DataFrame, method:str, save_path:str):
        if os.path.exists(save_path) == False:
            corr_matrix = df.corr(method = method, numeric_only = True)
            pickle.dump(corr_matrix, open(save_path, 'wb'))
        else:
            corr_matrix = pickle.load(open(save_path, 'rb'))

        return corr_matrix

    def remove_aggr_function(str_to_check:str) -> str:
        parts = str_to_check.split('_')
        
        if (len(parts) > 2):
            index_to_remove = len(parts) - 2
            
            # remove aggregation function
            if (parts[index_to_remove] in ['min', 'std', 'max', 'mea', 'td']):
                parts.remove(parts[index_to_remove])
                
            result = '_'.join(parts)
            return result
        else:
            return str_to_check    

        
    def get_heatmap_matrix(corr_matrix:DataFrame):
        heatmap_matrix = pd.DataFrame(corr_matrix['target'].abs())
        heatmap_matrix = heatmap_matrix.sort_values(by='target', ascending=False)
        heatmap_matrix = heatmap_matrix.drop(index=['target'])           
        return heatmap_matrix
    
    # index - column name
    # target - value
    def plot_heatmap(heatmap_matrix:DataFrame):
        plt.figure(figsize=(40, 120))
        heatmap = sns.heatmap(heatmap_matrix, vmin=-1, vmax=1, annot=True, cmap='BrBG')
        heatmap.set_title('Features Correlating with Churn Rate', fontdict={'fontsize':18}, pad=16);
        return heatmap_matrix
    
    def get_important_features(heatmap_matrix:DataFrame, use_groupping = False, num_of_features:int = -1):
        df_features = heatmap_matrix.reset_index()
        df_features = df_features.rename(columns = {'index':'feature'})
        
        # apply aggregation function for further groupping
        df_features['feature_group'] = df_features['feature'].apply(FeatureHelper.remove_aggr_function)
        df_features = df_features[['feature', 'feature_group', 'target']]
        sorted_features = df_features.sort_values(by=['feature_group', 'target'], ascending = [False, False])
        
        # take first item from the group
        if (use_groupping == True):
            important_features = sorted_features.groupby('feature_group').first()
        else:
            important_features = sorted_features

        # order by target
        important_features = important_features.sort_values(by='target', ascending=False)
           
        # take N first rows
        if (num_of_features != -1):
            important_features = important_features.head(num_of_features)
        
        # optimize for heatmap
        important_features = important_features.reset_index()
        important_features = important_features[['feature', 'target']]
        important_features.index = important_features['feature']
        important_features.index.name = None
        important_features = important_features[['target']]
        important_features = important_features[important_features['target'] > 0]
        
        return important_features
    
    def get_important_features_tuples(heatmap_matrix:DataFrame, num_of_features:int = -1):
        important_features = FeatureHelper.get_important_features(heatmap_matrix, num_of_features)
        
        if (num_of_features == -1):
            num_of_features = len(important_features)
        
        important_features_tuples = list(zip(important_features.index, 
                                             important_features.target, 
                                             list(range(0, num_of_features))))
        
        return important_features_tuples

In [12]:
class FeatureSelector(abc.ABC):

    @abc.abstractmethod
    def get_heatmap(self) -> pd.DataFrame:
        pass
    
    @abc.abstractmethod
    def plot_heatmap(self):
        pass

    @abc.abstractmethod
    def get_important_features(self) -> pd.DataFrame:
        pass
    
    @abc.abstractmethod
    # 1 - feature name, 2 - target, 3 - sorted number
    def get_important_features_tuples(self) -> List[tuple[str, float, int]]:
        pass

In [13]:
class CorrelationSelector(FeatureSelector):
    
    def __init__(self, data:pd.DataFrame, corr_method:str, num_of_features:int):
        self.data = data
        self.corr_method = corr_method
        self.file_prefix = corr_method
        self.num_of_features = num_of_features
    
    def get_heatmap(self) -> pd.DataFrame:
        self.save_path = f'{self.file_prefix}_{_СORRELATION_MATRIX_PATH_}'
        self.corr_m = FeatureHelper.get_correlation_matrix(self.data, self.corr_method, self.save_path)
        self.heatmap_m = FeatureHelper.get_heatmap_matrix(self.corr_m)
        return self.heatmap_m 
    
    def get_important_features(self) -> pd.DataFrame:
        return FeatureHelper.get_important_features(self.heatmap_m, self.num_of_features)
    
    def plot_heatmap(self):
        FeatureHelper.plot_heatmap(self.get_important_features())
    
    def get_non_correlated_features(self, barrier_coef:float, do_log:bool) -> List[str]:
        
        important_tuples = FeatureHelper.get_important_features_tuples(self.heatmap_m, self.num_of_features)
        # f1, f2, corr
        features_corr = FeatureHelper.get_feature_correlation_df(self.corr_m)
        
        already_processed = set()
        all_features = [t[0] for t in important_tuples]

        for f in all_features:
            # get correlated features
            correlated = list(features_corr[(features_corr['f1']==f) & (features_corr['corr'] > barrier_coef)]['f2'])

            # if highly correlated features exist
            if (len(correlated)>0):

                for to_remove in correlated:
                    if (to_remove not in already_processed):
                        if (to_remove in all_features):
                            all_features.remove(to_remove)
                            if do_log: print(f'Removing: {to_remove} for {f}')            

        # remember initial feature
        already_processed.add(f)
        
        # return non-correlated features
        return all_features
    
    def get_important_noncorrelated_features_tuples(self, mutual_correlation:float, feature_importance:float):
        
        final = []
        important = self.get_important_features_tuples()
        non_correlated = self.get_non_correlated_features(mutual_correlation, False)
        
        for i in important:
            if (i[0] in non_correlated and i[1] > feature_importance):
                final.append(i)

        return final
                        
    def get_important_features_tuples(self) ->  List[tuple[str, float, int]]:
        return FeatureHelper.get_important_features_tuples(self.heatmap_m, self.num_of_features)
    
    def __str__(self):
        return type(self).__name__ + '_' + self.corr_method

In [14]:
# https://nitin9809.medium.com/lightgbm-binary-classification-multi-class-classification-regression-using-python-4f22032b36a2
# https://www.analyticsvidhya.com/blog/2020/10/feature-selection-techniques-in-machine-learning/
class LGBMSelector(FeatureSelector):
    
    def __init__(self, data:pd.DataFrame, num_of_features:int):
        self.data = data
        self.num_of_features = num_of_features
        self.train_model()
    
    def train_model(self):
        test, train = split_test_train(self.data)

        self.y_train = train['target'].round(0).astype(int)
        self.y_test = test['target'].round(0).astype(int)

        train = train.drop(['target'], axis=1)
        test = test.drop(['target'], axis=1)

        # save regressor
        self.regressor = lg.LGBMClassifier()
        self.regressor.fit(train, self.y_train)
        predicted = self.regressor.predict(test)
        
        # save predicted data
        self.predicted = predicted
    
    def get_heatmap(self) -> pd.DataFrame:
        df_feature_importance = pd.DataFrame(list(zip(list(self.regressor.feature_importances_), list(self.data.columns))))
        df_feature_importance = df_feature_importance.set_axis(['target', 'feature'], axis=1)
        df_feature_importance = df_feature_importance.sort_values(by=['target'], ascending=False)
        df_feature_importance = df_feature_importance.set_index('feature')
        df_feature_importance.index.name = None
        self.heatmap_m = df_feature_importance
        return self.heatmap_m
    
    def plot_heatmap(self):
        FeatureHelper.plot_heatmap(self.get_important_features())

    def get_important_features(self) -> pd.DataFrame:
        return FeatureHelper.get_important_features(self.heatmap_m, self.num_of_features)
    
    def get_important_features_tuples(self) -> List[tuple[str, float, int]]:
        features_tuples = FeatureHelper.get_important_features_tuples(self.heatmap_m, self.num_of_features)
        return features_tuples
        
    def get_ROCAUC(self):
        return Metrics.roc_auc(self.y_test, self.predicted)

    def get_confusion_matrix(self):
        return Metrics.confusion_matrix(self.y_test, self.predicted)
    
    def get_classification_report(self):
        return Metrics.classification_report(self.y_test, self.predicted)
    
    def __str__(self):
        return type(self).__name__

In [15]:
# https://neptune.ai/blog/lightgbm-parameters-guide
class EnhancedLGBMSelector(LGBMSelector):
    
    def train_model(self):        
        
        test, train = split_test_train(df_combine)

        self.y_train = train['target'].round(0).astype(int)
        self.y_test = test['target'].round(0).astype(int)

        self.x_train = train.drop(['target'], axis=1)
        self.x_test = test.drop(['target'], axis=1)

        # Specifying the parameter
        d_train = lgb.Dataset(x_train, label=y_train)
        d_test = lgb.Dataset(x_test, label=y_test, reference=d_train)

        # load model from disk
        if os.path.exists(_СHURN_PATH_) == True: 
            with open('LGBM_model_800_features.pcl', 'rb') as file:
                self.regressor = pickle.load(file)
        else:
            params={}
            params['boosting_type']='gbdt' 
            params['objective']='binary' 
            params['metric']='auc' 
            params['verbosity'] = 0

            #train the model 
            self.regressor=lgb.train(params = params, 
                          train_set = d_train,
                          valid_sets = d_test, 
                          num_boost_round = 20, 
                          callbacks= [lgb.early_stopping(stopping_rounds=10)])
        
        #prediction on the test set
        self.predicted = self.regressor.predict(x_test)


In [16]:
class TransformPipe:
    
    def __init__(self, funcs, **kwargs):
        self.funcs = funcs
        self.kwargs = kwargs
    
    def transform(self, df:DataFrame) -> DataFrame:
        for f in self.funcs:
            df = f(df, **self.kwargs)
            
        return df

## Load Data

In [17]:
train_df, train_num, train_dpi, test_df, test_num, test_dpi = load_churn_reduced()
df_combine, df_combine_num, df_combine_dpi = combine_data(train_df, train_num, train_dpi, test_df, test_num, test_dpi)

### Feature importance

In [18]:
# https://towardsdatascience.com/deep-dive-on-ml-techniques-for-feature-selection-in-python-part-2-c258f8a2ac43
# https://www.kaggle.com/code/gomes555/tps-jun2021-feature-selection-lightgbm-tuner

result = {}
seleсtors = [ CorrelationSelector(df_combine, 'pearson', -1),
              CorrelationSelector(df_combine, 'spearman', -1) ]

for selector in seleсtors:
    heatmap = selector.get_heatmap()
    features = selector.get_important_features()
    features_tuples = selector.get_important_features_tuples()
    result[selector.__str__()] = selector

In [19]:
# normal = LGBMSelector(df_combine, -1)

In [20]:
# enhanced = EnhancedLGBMSelector(df_combine, -1)

In [21]:
# normal.get_ROCAUC(), enhanced.get_ROCAUC()

In [22]:
test, train = split_test_train(df_combine)

y_train = train['target'].round(0).astype(int)
y_test = test['target'].round(0).astype(int)

x_train = train.drop(['target'], axis=1)
x_test = test.drop(['target'], axis=1)

#Specifying the parameter
d_train = lgb.Dataset(x_train, label=y_train)
d_test = lgb.Dataset(x_test, label=y_test, reference=d_train)

params={}
params['boosting_type']='gbdt' #GradientBoostingDecisionTree
params['objective']='binary' #Binary target feature
params['metric']='auc' #metric for binary classification
#params['num_boost_round']=100
#params['early_stopping_rounds']=30
#params['max_depth']=100
#params['is_unbalance']=True
params['verbosity'] = 2

#train the model 
clf=lgb.train(params = params, 
              train_set = d_train,
              valid_sets = d_test, 
              num_boost_round = 20, 
              callbacks= [lgb.early_stopping(stopping_rounds=10)])

#prediction on the test set
predicted=clf.predict(x_test)


[I 2023-07-01 00:28:47,743] A new study created in memory with name: no-name-88c2c11e-f6e1-4583-839a-02341f78fa75
feature_fraction, val_score: -inf:   0%|          | 0/7 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.355101 seconds, init for row-wise cost 1.666131 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
Training until validation scores don't improve for 10 rounds
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a

feature_fraction, val_score: 0.883539:  14%|#4        | 1/7 [00:31<03:08, 31.39s/it][I 2023-07-01 00:29:19,177] Trial 0 finished with value: 0.883539274986546 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.883539274986546.
feature_fraction, val_score: 0.883539:  14%|#4        | 1/7 [00:31<03:08, 31.39s/it]

[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 10
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.883539
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.291937 seconds, init for row-wise cost 1.293693 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] 

feature_fraction, val_score: 0.886879:  29%|##8       | 2/7 [01:00<02:29, 29.87s/it][I 2023-07-01 00:29:48,004] Trial 1 finished with value: 0.8868785101089008 and parameters: {'feature_fraction': 0.8}. Best is trial 1 with value: 0.8868785101089008.
feature_fraction, val_score: 0.886879:  29%|##8       | 2/7 [01:00<02:29, 29.87s/it]

[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 11
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.323685 seconds, init for row-wise cost 1.411335 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] 

feature_fraction, val_score: 0.886879:  43%|####2     | 3/7 [01:32<02:03, 30.77s/it][I 2023-07-01 00:30:19,854] Trial 2 finished with value: 0.8868785101089008 and parameters: {'feature_fraction': 1.0}. Best is trial 1 with value: 0.8868785101089008.
feature_fraction, val_score: 0.886879:  43%|####2     | 3/7 [01:32<02:03, 30.77s/it]

[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 10
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.274004 seconds, init for row-wise cost 1.373030 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] 

feature_fraction, val_score: 0.886879:  57%|#####7    | 4/7 [02:03<01:33, 31.13s/it][I 2023-07-01 00:30:51,527] Trial 3 finished with value: 0.8868785101089008 and parameters: {'feature_fraction': 0.4}. Best is trial 1 with value: 0.8868785101089008.
feature_fraction, val_score: 0.886879:  57%|#####7    | 4/7 [02:03<01:33, 31.13s/it]

Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.335733 seconds, init for row-wise cost 1.439177 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [

feature_fraction, val_score: 0.886879:  71%|#######1  | 5/7 [02:30<00:58, 29.41s/it][I 2023-07-01 00:31:17,846] Trial 4 finished with value: 0.8868785101089008 and parameters: {'feature_fraction': 0.6}. Best is trial 1 with value: 0.8868785101089008.
feature_fraction, val_score: 0.886879:  71%|#######1  | 5/7 [02:30<00:58, 29.41s/it]

[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.274508 seconds, init for row-wise cost 1.244605 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 9
[LightGBM] [

feature_fraction, val_score: 0.886879:  86%|########5 | 6/7 [02:55<00:28, 28.22s/it][I 2023-07-01 00:31:43,816] Trial 5 finished with value: 0.8868785101089008 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 1 with value: 0.8868785101089008.
feature_fraction, val_score: 0.886879:  86%|########5 | 6/7 [02:56<00:28, 28.22s/it]

[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.299129 seconds, init for row-wise cost 1.360175 seconds
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Debug] Using Sparse Multi-Val Bin
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 8
[LightGBM] [Debug] Trained a tre

feature_fraction, val_score: 0.886879: 100%|##########| 7/7 [03:26<00:00, 28.85s/it][I 2023-07-01 00:32:13,954] Trial 6 finished with value: 0.8868785101089008 and parameters: {'feature_fraction': 0.5}. Best is trial 1 with value: 0.8868785101089008.
feature_fraction, val_score: 0.886879: 100%|##########| 7/7 [03:26<00:00, 29.46s/it]

[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 11
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879



num_leaves, val_score: 0.886879:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.320738 seconds, init for row-wise cost 1.276004 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 41 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 41 and depth = 8
[LightGBM] [Debug] Trained a tree with leaves = 41 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 41 and depth = 9
[LightGBM] [Debug] Trained a tree with leaves = 41 and depth = 9
[LightGBM] [Debug] Train

num_leaves, val_score: 0.886879:   5%|5         | 1/20 [00:27<08:34, 27.09s/it][I 2023-07-01 00:32:41,142] Trial 7 finished with value: 0.8868785101089008 and parameters: {'num_leaves': 41}. Best is trial 7 with value: 0.8868785101089008.
num_leaves, val_score: 0.886879:   5%|5         | 1/20 [00:27<08:34, 27.09s/it]

[LightGBM] [Debug] Trained a tree with leaves = 41 and depth = 10
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.886879
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.187930 seconds, init for row-wise cost 1.214906 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 85 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 85 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 85 and depth = 14
[LightGB

num_leaves, val_score: 0.887817:  10%|#         | 2/20 [00:55<08:16, 27.60s/it][I 2023-07-01 00:33:09,051] Trial 8 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 85}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  10%|#         | 2/20 [00:55<08:16, 27.60s/it]

[LightGBM] [Debug] Trained a tree with leaves = 85 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.265535 seconds, init for row-wise cost 1.290246 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 240 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 240 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 240 and depth = 16
[Ligh

num_leaves, val_score: 0.887817:  15%|#5        | 3/20 [01:28<08:34, 30.24s/it][I 2023-07-01 00:33:42,480] Trial 9 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 240}. Best is trial 8 with value: 0.8878170342950084.


[LightGBM] [Debug] Trained a tree with leaves = 240 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817


num_leaves, val_score: 0.887817:  15%|#5        | 3/20 [01:28<08:34, 30.24s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.254452 seconds, init for row-wise cost 1.320406 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 95 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 95 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 95 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 95 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 95 and depth = 13
[LightGBM] [Debug] 

num_leaves, val_score: 0.887817:  20%|##        | 4/20 [01:58<07:59, 29.98s/it][I 2023-07-01 00:34:12,085] Trial 10 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 95}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  20%|##        | 4/20 [01:58<07:59, 29.98s/it]

[LightGBM] [Debug] Trained a tree with leaves = 95 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.318274 seconds, init for row-wise cost 1.268384 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 254 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 254 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 254 and depth = 15
[Ligh

num_leaves, val_score: 0.887817:  25%|##5       | 5/20 [02:31<07:49, 31.29s/it][I 2023-07-01 00:34:45,630] Trial 11 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 254}. Best is trial 8 with value: 0.8878170342950084.


[LightGBM] [Debug] Trained a tree with leaves = 254 and depth = 20
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817


num_leaves, val_score: 0.887817:  25%|##5       | 5/20 [02:31<07:49, 31.29s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.270297 seconds, init for row-wise cost 1.276026 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 225 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 225 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 225 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 225 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 225 and depth = 16
[LightGBM] [De

num_leaves, val_score: 0.887817:  30%|###       | 6/20 [03:07<07:37, 32.70s/it]

[LightGBM] [Debug] Trained a tree with leaves = 225 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817


[I 2023-07-01 00:35:21,143] Trial 12 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 225}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  30%|###       | 6/20 [03:07<07:37, 32.70s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.264841 seconds, init for row-wise cost 1.285757 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 148 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 148 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 148 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 148 and depth = 17
[LightGBM] [Debug] Trained a tree with leaves = 148 and depth = 16
[LightGBM] [De

num_leaves, val_score: 0.887817:  35%|###5      | 7/20 [03:39<07:02, 32.50s/it][I 2023-07-01 00:35:53,218] Trial 13 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 148}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  35%|###5      | 7/20 [03:39<07:02, 32.50s/it]

[LightGBM] [Debug] Trained a tree with leaves = 148 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.295221 seconds, init for row-wise cost 1.258665 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 138 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 138 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 138 and depth = 13
[Lig

num_leaves, val_score: 0.887817:  40%|####      | 8/20 [04:10<06:24, 32.01s/it][I 2023-07-01 00:36:24,192] Trial 14 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 138}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  40%|####      | 8/20 [04:10<06:24, 32.01s/it]

[LightGBM] [Debug] Trained a tree with leaves = 138 and depth = 18
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.193132 seconds, init for row-wise cost 1.153360 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 9 and depth = 4
[LightGBM] [Debug] Trained a tree with leaves = 9 and depth = 4
[LightGBM] [Debug] Trained a tree with leaves = 9 and depth = 5
[LightGBM] [D

num_leaves, val_score: 0.887817:  45%|####5     | 9/20 [04:33<05:23, 29.39s/it][I 2023-07-01 00:36:47,748] Trial 15 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 9}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  45%|####5     | 9/20 [04:33<05:23, 29.39s/it]

[LightGBM] [Debug] Trained a tree with leaves = 9 and depth = 7
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.304236 seconds, init for row-wise cost 1.293321 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 192 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 192 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 192 and depth = 14
[LightG

num_leaves, val_score: 0.887817:  50%|#####     | 10/20 [05:06<05:04, 30.49s/it]

[LightGBM] [Debug] Trained a tree with leaves = 192 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817


[I 2023-07-01 00:37:20,766] Trial 16 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 192}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  50%|#####     | 10/20 [05:06<05:04, 30.49s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.308798 seconds, init for row-wise cost 1.400015 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 91 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 91 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 91 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 91 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 91 and depth = 13
[LightGBM] [Debug] 

num_leaves, val_score: 0.887817:  55%|#####5    | 11/20 [05:36<04:33, 30.36s/it][I 2023-07-01 00:37:50,869] Trial 17 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 91}. Best is trial 8 with value: 0.8878170342950084.


[LightGBM] [Debug] Trained a tree with leaves = 91 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817


num_leaves, val_score: 0.887817:  55%|#####5    | 11/20 [05:36<04:33, 30.36s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.259618 seconds, init for row-wise cost 1.249554 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 70 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 70 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 70 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 70 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 70 and depth = 13
[LightGBM] [Debug] 

num_leaves, val_score: 0.887817:  60%|######    | 12/20 [06:05<03:59, 29.98s/it][I 2023-07-01 00:38:19,891] Trial 18 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 70}. Best is trial 8 with value: 0.8878170342950084.
num_leaves, val_score: 0.887817:  60%|######    | 12/20 [06:05<03:59, 29.98s/it]

[LightGBM] [Debug] Trained a tree with leaves = 70 and depth = 10
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.293015 seconds, init for row-wise cost 1.375646 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 176 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 176 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 176 and depth = 13
[Ligh

num_leaves, val_score: 0.887817:  65%|######5   | 13/20 [06:37<03:33, 30.49s/it][I 2023-07-01 00:38:51,578] Trial 19 finished with value: 0.8878170342950084 and parameters: {'num_leaves': 176}. Best is trial 8 with value: 0.8878170342950084.


[LightGBM] [Debug] Trained a tree with leaves = 176 and depth = 19
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.887817


num_leaves, val_score: 0.887817:  65%|######5   | 13/20 [06:37<03:33, 30.49s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.250577 seconds, init for row-wise cost 1.396682 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

num_leaves, val_score: 0.888636:  70%|#######   | 14/20 [07:07<03:01, 30.32s/it][I 2023-07-01 00:39:21,515] Trial 20 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 115}. Best is trial 20 with value: 0.8886359227161271.
num_leaves, val_score: 0.888636:  70%|#######   | 14/20 [07:07<03:01, 30.32s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 18
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.298659 seconds, init for row-wise cost 1.264371 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 113 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 113 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 113 and depth = 13
[Lig

num_leaves, val_score: 0.888636:  75%|#######5  | 15/20 [07:37<02:31, 30.22s/it][I 2023-07-01 00:39:51,470] Trial 21 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 113}. Best is trial 20 with value: 0.8886359227161271.
num_leaves, val_score: 0.888636:  75%|#######5  | 15/20 [07:37<02:31, 30.22s/it]

[LightGBM] [Debug] Trained a tree with leaves = 113 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.183241 seconds, init for row-wise cost 1.184992 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 107 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 107 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 107 and depth = 14
[Lig

num_leaves, val_score: 0.888636:  80%|########  | 16/20 [08:06<01:59, 29.82s/it]

[LightGBM] [Debug] Trained a tree with leaves = 107 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:40:20,445] Trial 22 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 107}. Best is trial 20 with value: 0.8886359227161271.
num_leaves, val_score: 0.888636:  80%|########  | 16/20 [08:06<01:59, 29.82s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.270784 seconds, init for row-wise cost 1.202278 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 121 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 121 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 121 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 121 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 121 and depth = 15
[LightGBM] [De

num_leaves, val_score: 0.888636:  85%|########5 | 17/20 [08:36<01:29, 29.92s/it][I 2023-07-01 00:40:50,557] Trial 23 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 121}. Best is trial 20 with value: 0.8886359227161271.


[LightGBM] [Debug] Trained a tree with leaves = 121 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


num_leaves, val_score: 0.888636:  85%|########5 | 17/20 [08:36<01:29, 29.92s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.293808 seconds, init for row-wise cost 1.221756 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 119 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 119 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 119 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 119 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 119 and depth = 16
[LightGBM] [De

num_leaves, val_score: 0.888636:  90%|######### | 18/20 [09:07<01:00, 30.20s/it]

[LightGBM] [Debug] Trained a tree with leaves = 119 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:41:21,452] Trial 24 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 119}. Best is trial 20 with value: 0.8886359227161271.
num_leaves, val_score: 0.888636:  90%|######### | 18/20 [09:07<01:00, 30.20s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.252499 seconds, init for row-wise cost 1.210295 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 165 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 165 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 165 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 165 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 165 and depth = 16
[LightGBM] [De

num_leaves, val_score: 0.888636:  95%|#########5| 19/20 [09:38<00:30, 30.49s/it][I 2023-07-01 00:41:52,497] Trial 25 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 165}. Best is trial 20 with value: 0.8886359227161271.
num_leaves, val_score: 0.888636:  95%|#########5| 19/20 [09:38<00:30, 30.49s/it]

[LightGBM] [Debug] Trained a tree with leaves = 165 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.298884 seconds, init for row-wise cost 1.342315 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 57 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 57 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 57 and depth = 11
[LightG

num_leaves, val_score: 0.888636: 100%|##########| 20/20 [10:06<00:00, 29.68s/it][I 2023-07-01 00:42:20,334] Trial 26 finished with value: 0.8886359227161271 and parameters: {'num_leaves': 57}. Best is trial 20 with value: 0.8886359227161271.
num_leaves, val_score: 0.888636: 100%|##########| 20/20 [10:06<00:00, 30.32s/it]


[LightGBM] [Debug] Trained a tree with leaves = 57 and depth = 10
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


bagging, val_score: 0.888636:   0%|          | 0/10 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.283088 seconds, init for row-wise cost 1.285526 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 86408 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Re-ba

bagging, val_score: 0.888636:  10%|#         | 1/10 [00:28<04:12, 28.02s/it][I 2023-07-01 00:42:48,490] Trial 27 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.5744836697466591, 'bagging_freq': 4}. Best is trial 27 with value: 0.8886359227161271.


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


bagging, val_score: 0.888636:  10%|#         | 1/10 [00:28<04:12, 28.02s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.229269 seconds, init for row-wise cost 1.190130 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 138704 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trai

bagging, val_score: 0.888636:  20%|##        | 2/10 [00:57<03:49, 28.66s/it][I 2023-07-01 00:43:17,531] Trial 28 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.9249011767937809, 'bagging_freq': 7}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  20%|##        | 2/10 [00:57<03:49, 28.66s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.232074 seconds, init for row-wise cost 0.942917 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 60898 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Re-bagging, using 61085 data to train
[LightGBM] [Debug] Train

bagging, val_score: 0.888636:  30%|###       | 3/10 [01:24<03:15, 27.93s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:43:44,678] Trial 29 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.40598120770555474, 'bagging_freq': 1}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  30%|###       | 3/10 [01:24<03:15, 27.93s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.293514 seconds, init for row-wise cost 1.302760 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 147873 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trai

bagging, val_score: 0.888636:  40%|####      | 4/10 [01:54<02:52, 28.80s/it][I 2023-07-01 00:44:14,788] Trial 30 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.9854164725283208, 'bagging_freq': 7}. Best is trial 27 with value: 0.8886359227161271.


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


bagging, val_score: 0.888636:  40%|####      | 4/10 [01:54<02:52, 28.80s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.291100 seconds, init for row-wise cost 1.205591 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 110876 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Re-bagging, using 110834 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Re-bagging, using 110852 data to train
[LightGBM] [Debug] Trained a tree with le

bagging, val_score: 0.888636:  50%|#####     | 5/10 [02:24<02:25, 29.20s/it][I 2023-07-01 00:44:44,605] Trial 31 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.7383935125389781, 'bagging_freq': 1}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  50%|#####     | 5/10 [02:24<02:25, 29.20s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.332021 seconds, init for row-wise cost 1.554281 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 112387 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [D

bagging, val_score: 0.888636:  60%|######    | 6/10 [02:59<02:05, 31.32s/it][I 2023-07-01 00:45:20,050] Trial 32 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.7485109033821102, 'bagging_freq': 4}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  60%|######    | 6/10 [02:59<02:05, 31.32s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 18
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.242562 seconds, init for row-wise cost 1.224846 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 65520 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [De

bagging, val_score: 0.888636:  70%|#######   | 7/10 [03:27<01:30, 30.29s/it][I 2023-07-01 00:45:48,267] Trial 33 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.4362958755853858, 'bagging_freq': 5}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  70%|#######   | 7/10 [03:27<01:30, 30.29s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.408979 seconds, init for row-wise cost 1.260893 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 124740 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [D

bagging, val_score: 0.888636:  80%|########  | 8/10 [03:57<01:00, 30.06s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:46:17,914] Trial 34 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.8307153689091664, 'bagging_freq': 3}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  80%|########  | 8/10 [03:57<01:00, 30.06s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.316378 seconds, init for row-wise cost 1.295765 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 88672 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Re-bagging, using 88558 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [Debug] Trained a tree 

bagging, val_score: 0.888636:  90%|######### | 9/10 [04:24<00:29, 29.29s/it][I 2023-07-01 00:46:45,389] Trial 35 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.5896355033208046, 'bagging_freq': 2}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636:  90%|######### | 9/10 [04:25<00:29, 29.29s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.248286 seconds, init for row-wise cost 1.217235 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Re-bagging, using 128428 data to train
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [D

bagging, val_score: 0.888636: 100%|##########| 10/10 [04:53<00:00, 29.12s/it][I 2023-07-01 00:47:14,190] Trial 36 finished with value: 0.8886359227161271 and parameters: {'bagging_fraction': 0.8552728357052677, 'bagging_freq': 6}. Best is trial 27 with value: 0.8886359227161271.
bagging, val_score: 0.888636: 100%|##########| 10/10 [04:53<00:00, 29.39s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636



feature_fraction_stage2, val_score: 0.888636:   0%|          | 0/6 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.281887 seconds, init for row-wise cost 1.264199 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

feature_fraction_stage2, val_score: 0.888636:  17%|#6        | 1/6 [00:30<02:30, 30.05s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:47:44,406] Trial 37 finished with value: 0.8886359227161271 and parameters: {'feature_fraction': 0.7200000000000001}. Best is trial 37 with value: 0.8886359227161271.
feature_fraction_stage2, val_score: 0.888636:  17%|#6        | 1/6 [00:30<02:30, 30.05s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.272871 seconds, init for row-wise cost 1.284446 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [De

feature_fraction_stage2, val_score: 0.888636:  33%|###3      | 2/6 [01:00<02:00, 30.03s/it][I 2023-07-01 00:48:14,311] Trial 38 finished with value: 0.8886359227161271 and parameters: {'feature_fraction': 0.8160000000000001}. Best is trial 37 with value: 0.8886359227161271.
feature_fraction_stage2, val_score: 0.888636:  33%|###3      | 2/6 [01:00<02:00, 30.03s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.274545 seconds, init for row-wise cost 1.304708 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[Lig

feature_fraction_stage2, val_score: 0.888636:  50%|#####     | 3/6 [01:29<01:28, 29.60s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:48:43,518] Trial 39 finished with value: 0.8886359227161271 and parameters: {'feature_fraction': 0.7520000000000001}. Best is trial 37 with value: 0.8886359227161271.
feature_fraction_stage2, val_score: 0.888636:  50%|#####     | 3/6 [01:29<01:28, 29.60s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.289327 seconds, init for row-wise cost 1.337704 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

feature_fraction_stage2, val_score: 0.888636:  67%|######6   | 4/6 [02:00<01:00, 30.48s/it][I 2023-07-01 00:49:15,293] Trial 40 finished with value: 0.8886359227161271 and parameters: {'feature_fraction': 0.88}. Best is trial 37 with value: 0.8886359227161271.
feature_fraction_stage2, val_score: 0.888636:  67%|######6   | 4/6 [02:01<01:00, 30.48s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.247465 seconds, init for row-wise cost 1.428858 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[Lig

feature_fraction_stage2, val_score: 0.888636:  83%|########3 | 5/6 [02:31<00:30, 30.33s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 18
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


[I 2023-07-01 00:49:45,400] Trial 41 finished with value: 0.8886359227161271 and parameters: {'feature_fraction': 0.8480000000000001}. Best is trial 37 with value: 0.8886359227161271.
feature_fraction_stage2, val_score: 0.888636:  83%|########3 | 5/6 [02:31<00:30, 30.33s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.247025 seconds, init for row-wise cost 1.226112 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

feature_fraction_stage2, val_score: 0.888636: 100%|##########| 6/6 [03:00<00:00, 29.96s/it][I 2023-07-01 00:50:14,556] Trial 42 finished with value: 0.8886359227161271 and parameters: {'feature_fraction': 0.784}. Best is trial 37 with value: 0.8886359227161271.
feature_fraction_stage2, val_score: 0.888636: 100%|##########| 6/6 [03:00<00:00, 30.06s/it]


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.888636


regularization_factors, val_score: 0.888636:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.184440 seconds, init for row-wise cost 1.123351 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [De

regularization_factors, val_score: 0.889445:   5%|5         | 1/20 [00:29<09:11, 29.03s/it][I 2023-07-01 00:50:43,700] Trial 43 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 3.3866509133357354, 'lambda_l2': 1.4041138109033385e-07}. Best is trial 43 with value: 0.889444647250268.


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 17
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


regularization_factors, val_score: 0.889445:   5%|5         | 1/20 [00:29<09:11, 29.03s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.187974 seconds, init for row-wise cost 1.154563 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

regularization_factors, val_score: 0.889445:  10%|#         | 2/20 [00:57<08:38, 28.81s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


[I 2023-07-01 00:51:12,418] Trial 44 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 2.882222545511611, 'lambda_l2': 3.528686937325463e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  10%|#         | 2/20 [00:57<08:38, 28.81s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.273501 seconds, init for row-wise cost 1.282317 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [De

regularization_factors, val_score: 0.889445:  15%|#5        | 3/20 [01:26<08:13, 29.03s/it][I 2023-07-01 00:51:41,611] Trial 45 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 6.411603187060055, 'lambda_l2': 2.243083286884319e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  15%|#5        | 3/20 [01:27<08:13, 29.03s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.237575 seconds, init for row-wise cost 1.391373 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[Lig

regularization_factors, val_score: 0.889445:  20%|##        | 4/20 [01:55<07:38, 28.63s/it][I 2023-07-01 00:52:09,658] Trial 46 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 7.702828584281146, 'lambda_l2': 2.362838618150639e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  20%|##        | 4/20 [01:55<07:38, 28.63s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.220299 seconds, init for row-wise cost 1.483555 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[Lig

regularization_factors, val_score: 0.889445:  25%|##5       | 5/20 [02:23<07:06, 28.45s/it][I 2023-07-01 00:52:37,796] Trial 47 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 7.718905853181482, 'lambda_l2': 1.9140216217805077e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  25%|##5       | 5/20 [02:23<07:06, 28.45s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.202405 seconds, init for row-wise cost 1.074365 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[Lig

regularization_factors, val_score: 0.889445:  30%|###       | 6/20 [02:50<06:34, 28.16s/it][I 2023-07-01 00:53:05,332] Trial 48 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 7.559315145794619, 'lambda_l2': 1.9596843019723708e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  30%|###       | 6/20 [02:50<06:34, 28.16s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.234244 seconds, init for row-wise cost 1.277636 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[Lig

regularization_factors, val_score: 0.889445:  35%|###5      | 7/20 [03:19<06:10, 28.52s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 17
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


[I 2023-07-01 00:53:34,699] Trial 49 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 6.485163623815675, 'lambda_l2': 1.5887660611883522e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  35%|###5      | 7/20 [03:20<06:10, 28.52s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.279077 seconds, init for row-wise cost 1.336913 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [De

regularization_factors, val_score: 0.889445:  40%|####      | 8/20 [03:48<05:43, 28.61s/it][I 2023-07-01 00:54:03,435] Trial 50 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 5.462946038938918, 'lambda_l2': 2.316130320344606e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  40%|####      | 8/20 [03:48<05:43, 28.61s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 17
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.228539 seconds, init for row-wise cost 1.134941 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[Lig

regularization_factors, val_score: 0.889445:  45%|####5     | 9/20 [04:16<05:13, 28.47s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


[I 2023-07-01 00:54:31,674] Trial 51 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 6.70096133648986, 'lambda_l2': 2.1913840252179594e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  45%|####5     | 9/20 [04:17<05:13, 28.47s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.191081 seconds, init for row-wise cost 1.229027 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [De

regularization_factors, val_score: 0.889445:  50%|#####     | 10/20 [04:44<04:43, 28.31s/it][I 2023-07-01 00:54:59,523] Trial 52 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 6.832375401436067, 'lambda_l2': 1.9282702945752386e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  50%|#####     | 10/20 [04:44<04:43, 28.31s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 17
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.187738 seconds, init for row-wise cost 0.986111 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[Lig

regularization_factors, val_score: 0.889445:  55%|#####5    | 11/20 [05:13<04:15, 28.39s/it][I 2023-07-01 00:55:28,161] Trial 53 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 6.264039956026412, 'lambda_l2': 3.417792713023881e-08}. Best is trial 43 with value: 0.889444647250268.


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


regularization_factors, val_score: 0.889445:  55%|#####5    | 11/20 [05:13<04:15, 28.39s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.302501 seconds, init for row-wise cost 1.286868 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

regularization_factors, val_score: 0.889445:  60%|######    | 12/20 [05:41<03:47, 28.42s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


[I 2023-07-01 00:55:56,659] Trial 54 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 6.330111805534285, 'lambda_l2': 3.580013626566446e-08}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  60%|######    | 12/20 [05:42<03:47, 28.42s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.257988 seconds, init for row-wise cost 1.449387 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 17
[LightGBM] [De

regularization_factors, val_score: 0.889445:  65%|######5   | 13/20 [06:12<03:24, 29.15s/it][I 2023-07-01 00:56:27,402] Trial 55 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 0.06506700299247777, 'lambda_l2': 5.976031752049182e-07}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  65%|######5   | 13/20 [06:12<03:24, 29.15s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.203711 seconds, init for row-wise cost 1.443112 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[Lig

regularization_factors, val_score: 0.889445:  70%|#######   | 14/20 [06:41<02:54, 29.13s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 18
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


[I 2023-07-01 00:56:56,596] Trial 56 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 2.1102738922533952e-05, 'lambda_l2': 3.032710674212285e-06}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  70%|#######   | 14/20 [06:42<02:54, 29.13s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.183924 seconds, init for row-wise cost 0.984314 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
[LightGBM] [De

regularization_factors, val_score: 0.889445:  75%|#######5  | 15/20 [07:11<02:26, 29.38s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445


[I 2023-07-01 00:57:26,509] Trial 57 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 0.19518959511578693, 'lambda_l2': 0.0031940404064721606}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  75%|#######5  | 15/20 [07:11<02:26, 29.38s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.235206 seconds, init for row-wise cost 1.210777 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [De

regularization_factors, val_score: 0.889445:  80%|########  | 16/20 [07:41<01:58, 29.52s/it][I 2023-07-01 00:57:56,287] Trial 58 finished with value: 0.889444647250268 and parameters: {'lambda_l1': 0.36451490119965246, 'lambda_l2': 5.588126995221761e-07}. Best is trial 43 with value: 0.889444647250268.
regularization_factors, val_score: 0.889445:  80%|########  | 16/20 [07:41<01:58, 29.52s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.889445
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.219400 seconds, init for row-wise cost 1.238735 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[Lig

regularization_factors, val_score: 0.890067:  85%|########5 | 17/20 [08:11<01:29, 29.72s/it][I 2023-07-01 00:58:26,481] Trial 59 finished with value: 0.8900674850553022 and parameters: {'lambda_l1': 0.30296276025792374, 'lambda_l2': 2.492286902249665}. Best is trial 59 with value: 0.8900674850553022.
regularization_factors, val_score: 0.890067:  85%|########5 | 17/20 [08:11<01:29, 29.72s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890067
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.246169 seconds, init for row-wise cost 1.263639 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[Lig

regularization_factors, val_score: 0.890298:  90%|######### | 18/20 [08:42<00:59, 29.88s/it][I 2023-07-01 00:58:56,803] Trial 60 finished with value: 0.8902981172435277 and parameters: {'lambda_l1': 0.2817999473750504, 'lambda_l2': 3.719000841940739}. Best is trial 60 with value: 0.8902981172435277.


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 17
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890298


regularization_factors, val_score: 0.890298:  90%|######### | 18/20 [08:42<00:59, 29.88s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.266351 seconds, init for row-wise cost 1.322136 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 10
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [De

regularization_factors, val_score: 0.890298:  95%|#########5| 19/20 [09:13<00:30, 30.23s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890298


[I 2023-07-01 00:59:27,891] Trial 61 finished with value: 0.8902981172435277 and parameters: {'lambda_l1': 0.3082613206369492, 'lambda_l2': 2.2942897295448046}. Best is trial 60 with value: 0.8902981172435277.
regularization_factors, val_score: 0.890298:  95%|#########5| 19/20 [09:13<00:30, 30.23s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.284138 seconds, init for row-wise cost 1.257302 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [De

regularization_factors, val_score: 0.890298: 100%|##########| 20/20 [09:43<00:00, 30.29s/it][I 2023-07-01 00:59:58,209] Trial 62 finished with value: 0.8902981172435277 and parameters: {'lambda_l1': 0.16422158193277708, 'lambda_l2': 3.875295933871404}. Best is trial 60 with value: 0.8902981172435277.
regularization_factors, val_score: 0.890298: 100%|##########| 20/20 [09:43<00:00, 29.18s/it]


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890298


min_data_in_leaf, val_score: 0.890298:   0%|          | 0/5 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.298447 seconds, init for row-wise cost 1.415457 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [De

min_data_in_leaf, val_score: 0.890889:  20%|##        | 1/5 [00:30<02:00, 30.16s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890889


[I 2023-07-01 01:00:28,512] Trial 63 finished with value: 0.8908887747013381 and parameters: {'min_child_samples': 25}. Best is trial 63 with value: 0.8908887747013381.
min_data_in_leaf, val_score: 0.890889:  20%|##        | 1/5 [00:30<02:00, 30.16s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.331158 seconds, init for row-wise cost 1.272321 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [De

min_data_in_leaf, val_score: 0.890889:  40%|####      | 2/5 [01:01<01:31, 30.63s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 16
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890889


[I 2023-07-01 01:00:59,471] Trial 64 finished with value: 0.8908887747013381 and parameters: {'min_child_samples': 10}. Best is trial 63 with value: 0.8908887747013381.
min_data_in_leaf, val_score: 0.890889:  40%|####      | 2/5 [01:01<01:31, 30.63s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.299890 seconds, init for row-wise cost 1.402217 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 14
[LightGBM] [De

min_data_in_leaf, val_score: 0.890889:  60%|######    | 3/5 [01:33<01:02, 31.26s/it][I 2023-07-01 01:01:31,402] Trial 65 finished with value: 0.8908887747013381 and parameters: {'min_child_samples': 5}. Best is trial 63 with value: 0.8908887747013381.
min_data_in_leaf, val_score: 0.890889:  60%|######    | 3/5 [01:33<01:02, 31.26s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.890889
[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 1.211118 seconds, init for row-wise cost 1.739011 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 11
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[Lig

min_data_in_leaf, val_score: 0.891102:  80%|########  | 4/5 [36:56<14:17, 857.11s/it]

[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.891102


[I 2023-07-01 01:36:54,631] Trial 66 finished with value: 0.8911021565714513 and parameters: {'min_child_samples': 50}. Best is trial 66 with value: 0.8911021565714513.
min_data_in_leaf, val_score: 0.891102:  80%|########  | 4/5 [36:56<14:17, 857.11s/it]

[LightGBM] [Info] Number of positive: 9586, number of negative: 140414
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.880361
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.515813
[LightGBM] [Debug] init for col-wise cost 0.451700 seconds, init for row-wise cost 1.470046 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123224
[LightGBM] [Info] Number of data points in the train set: 150000, number of used features: 784
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.063907 -> initscore=-2.684292
[LightGBM] [Info] Start training from score -2.684292
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 13
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 12
[LightGBM] [De

min_data_in_leaf, val_score: 0.891102: 100%|##########| 5/5 [37:40<00:00, 563.94s/it][I 2023-07-01 01:37:38,715] Trial 67 finished with value: 0.8911021565714513 and parameters: {'min_child_samples': 100}. Best is trial 66 with value: 0.8911021565714513.


[LightGBM] [Debug] Trained a tree with leaves = 115 and depth = 15
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.891102


min_data_in_leaf, val_score: 0.891102: 100%|##########| 5/5 [37:40<00:00, 452.10s/it]


In [23]:
Metrics.roc_auc(y_test, predicted)

0.8911021565714514

In [33]:
fff.predict(x_test)

array([0.0237755 , 0.01348663, 0.01358502, ..., 0.0397863 , 0.04060678,
       0.59270895])

In [32]:
Metrics.roc_auc(y_test, predicted)

0.6224238253867485

In [24]:
existing = ['target', 'ind']
roc_auc = -1
prev_roc_auc = -1
increase_rate = -1

for c in list(df_combine.columns):
    if c != 'target':
        
        # add column
        existing.append(c)
        
        # new dataset
        df = df_combine[existing]
        
        # build model
        test, train = split_test_train(df)
        y_train = train['target']
        y_test = test['target']
        train = train.drop(['target'], axis=1)
        test = test.drop(['target'], axis=1)
        regressor = lg.LGBMClassifier()
        regressor.fit(train, y_train)
        
        # calculate metric
        predicted = regressor.predict(test)
        
        try:
            roc_auc = Metrics.roc_auc(y_test, predicted)
        except ValueError:
            roc_auc = -1
        
        print(f'{len(existing)} -> {roc_auc}')
        
        # remove current column
        # existing 

3 -> 0.5
4 -> 0.5
5 -> 0.5000247244095933
6 -> 0.49999288747270565
7 -> 0.49999288747270565
8 -> 0.562710535166727
9 -> 0.562710535166727
10 -> 0.5629195075422255
11 -> 0.5748062123180953
12 -> 0.575639392736126
13 -> 0.575575549426414
14 -> 0.5756463358274835
15 -> 0.5754974810621133
16 -> 0.5757315167190786
17 -> 0.5737078370499589
18 -> 0.5737078370499589
19 -> 0.5744663359313992
20 -> 0.5789362128999035
21 -> 0.5808645511942971
22 -> 0.5804355987804852
23 -> 0.5770969506326258
24 -> 0.5760161872467884
25 -> 0.5779761930421329
26 -> 0.5780649301973749
27 -> 0.5765264254166746
28 -> 0.5765264254166746
29 -> 0.5782031161718039
30 -> 0.5787951484055748
31 -> 0.5786925250676175
32 -> 0.5782246231896236
33 -> 0.5788908286521744
34 -> 0.5775121862996596
35 -> 0.5775121862996596
36 -> 0.5768918733926481
37 -> 0.5787846490505701
38 -> 0.5816553953204356
39 -> 0.581336178771875
40 -> 0.5796738825072713
41 -> 0.5792945483485827
42 -> 0.5801063911847305
43 -> 0.5796456018340308
44 -> 0.5823569

KeyboardInterrupt: 

### Check if dataset is balanced

In [None]:
churned = len(df_combine[df_combine['target'] == 1])
not_churned = len(df_combine[df_combine['target'] == 0])

In [None]:
y = [churned, not_churned]
x = ['Churned', 'Not churned']
plt.bar(x,y)
plt.show()

## Feature engineering
#### Check inbound calls from non-vodaphone number
#### Check outbound calls to non-vodaphone number
#### SMS from non-vodaphone number
#### SMS to non-vodaphone number

## Correlation matrix

## Explore numbers abonent had communication with + frequency

In [None]:
churned = df_combine[df_combine['target'] == 1]
churned_with_nums = pd.merge(churned, df_combine_num, on='abon_id', how='left')
churned_numbers = list(churned_with_nums['bnum'].unique())

non_churned = df_combine[df_combine['target'] == 0]
non_churned_with_nums = pd.merge(non_churned, df_combine_num, on='abon_id', how='left')
non_churned_numbers = list(non_churned_with_nums['bnum'].unique())

number_abon_had_communicated = (set(churned_numbers) - set(non_churned_numbers))
df_number_abon_had_communicated = pd.DataFrame(number_abon_had_communicated, columns= ['bnum'])

In [None]:
churned_with_nums[churned_with_nums['bnum'].isin(list(number_abon_had_communicated))]

## Telephone Feature

## Groupping

In [None]:
gr = train_num.groupby(['abon_id'])
gr.groups

In [None]:
# cor_matrix = train_df.corr()