# Kaggle Playground - Season 4 Episode 
## Binary Classification of Insurance Cross Selling

Competion link - https://www.kaggle.com/competitions/playground-series-s4e8

### Steps
- Import the necessary libraries, packages and modules
- Read the datsets as data framers

### Understand the problem

- class is the target variable
- It determines the class of a mushroom depending on the given variables

In [1]:
# Import the necessary libraries, packages and modules

import warnings
warnings.filterwarnings('ignore')

# import dtale    # Use of a web progrm to analysis the data deeply
import lightgbm as lgb
import logging
import matplotlib.pyplot as plt
import numpy as np
import optuna
import os
import pandas as pd
import pickle
import seaborn as sns
# import statsmodels.api as sm
import tensorflow as tf
import time
import xgboost as xgb
# import zipfile

from imblearn.over_sampling import RandomOverSampler
from optuna.samplers import TPESampler
#from pandas_profiling import ProfileReport
from scipy.stats import randint, uniform
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import (BaggingClassifier, RandomForestClassifier, AdaBoostClassifier,
                              GradientBoostingClassifier, HistGradientBoostingClassifier)
from sklearn.feature_selection import chi2, RFE, SelectKBest, SelectFromModel  
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, make_scorer, matthews_corrcoef, roc_auc_score
from sklearn.model_selection import cross_val_score, train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
# from skopt import BayesSearchCV
from xgboost import XGBClassifier

sns.set()
%matplotlib inline

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
start_time = time.time()

In [2]:
# Test to see if TensorFlow can utilize the GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

logger.info(f"Train data load completed. Time elapsed: {time.time() - start_time:.2f} seconds")

train_df.head(2)

INFO:__main__:Train data load completed. Time elapsed: 8.93 seconds


Unnamed: 0,id,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,0,e,8.8,f,s,u,f,a,c,w,...,,,w,,,f,f,,d,a
1,1,p,4.51,x,h,o,f,a,c,n,...,,y,o,,,t,z,,d,w


In [4]:
test_df.head(2)

Unnamed: 0,id,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,3116945,8.64,x,,n,t,,,w,11.13,...,b,,w,u,w,t,g,,d,a
1,3116946,6.9,o,t,o,f,,c,y,1.27,...,,,n,,,f,f,,d,a


### Identify the target variable and features

- class is the target variable
- It determines the class of a mushroom depending on the given variables

### Remove duplicate rows

- Checked the sum of duplicated rows in train and test datasets
- No dupllicated rows in train dataframe

### Handling missing values

- Checked the missing values in column
- There are considerable amount of missing values in many columns

In [5]:
train_df.shape

(3116945, 22)

In [6]:
test_df.shape

(2077964, 21)

### Data Preprocessing
- Droping columns with more than 50% missing values
- Using simple imputer
- Encoding Categorical Variables

In [7]:
# Drop columns with extremely high missing values
columns_to_drop = ['id', 'veil-type', 'spore-print-color', 'stem-root', 'veil-color', 'stem-surface']
train_df.drop(columns=columns_to_drop, inplace=True)
test_df.drop(columns=columns_to_drop, inplace=True)

In [8]:
# Spliting dependent and independent valriable

y_train = train_df['class']
train_df = train_df.drop('class', axis = 1)

train_df.head(2)

Unnamed: 0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-color,has-ring,ring-type,habitat,season
0,8.8,f,s,u,f,a,c,w,4.51,15.39,w,f,f,d,a
1,4.51,x,h,o,f,a,c,n,4.79,6.48,o,t,z,d,w


In [9]:
# Identify numerical and categorical columns
numerical_cols = train_df.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = train_df.select_dtypes(include=['object']).columns

for col in categorical_cols:
    print("Unique Value is each categorical column :")
    print(col, train_df[col].nunique())

Unique Value is each categorical column :
cap-shape 74
Unique Value is each categorical column :
cap-surface 83
Unique Value is each categorical column :
cap-color 78
Unique Value is each categorical column :
does-bruise-or-bleed 26
Unique Value is each categorical column :
gill-attachment 78
Unique Value is each categorical column :
gill-spacing 48
Unique Value is each categorical column :
gill-color 63
Unique Value is each categorical column :
stem-color 59
Unique Value is each categorical column :
has-ring 23
Unique Value is each categorical column :
ring-type 40
Unique Value is each categorical column :
habitat 52
Unique Value is each categorical column :
season 4


In [10]:
le = LabelEncoder()

# Define the imputer for numerical columns (median imputation)
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Define a function to apply LabelEncoder to each categorical column
def encode_labels(df, columns):
    df_encoded = df.copy()
    le_dict = {}
    for col in columns:
        # le = LabelEncoder()
        df_encoded[col] = le.fit_transform(df_encoded[col])
        le_dict[col] = le
    return df_encoded, le_dict

# Define the imputer and encoder for categorical columns using LabelEncoder
def encode_labels_pipeline(X):
    df = pd.DataFrame(X, columns=categorical_cols)
    df_encoded, le_dict = encode_labels(df, categorical_cols)
    return df_encoded.values

# Create a ColumnTransformer for numerical and categorical transformations
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', FunctionTransformer(func=lambda X: encode_labels_pipeline(X), validate=False), categorical_cols)
    ],
    remainder='passthrough'
)

# Fit the preprocessor on the training data
preprocessor.fit(train_df)

# Transform both the training and test data
train_df_transformed = preprocessor.transform(train_df)
test_df_transformed = preprocessor.transform(test_df)

print("Transformed Training Data Shape:", train_df_transformed.shape)
print("Transformed Test Data Shape:", test_df_transformed.shape)

logger.info(f"Missing values and categorical columns treatment completed. Time elapsed: {time.time() - start_time:.2f} seconds")

INFO:__main__:Missing values and categorical columns treatment completed. Time elapsed: 34.21 seconds


Transformed Training Data Shape: (3116945, 15)
Transformed Test Data Shape: (2077964, 15)


In [11]:
label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(y_train)

y_train

array([0, 1, 0, ..., 1, 0, 1])

In [12]:
# Combine feature names
all_feature_names = numerical_cols.tolist() + categorical_cols.tolist()

# Convert to DataFrame
train_df_transformed = pd.DataFrame(train_df_transformed, columns=all_feature_names)
test_df_transformed = pd.DataFrame(test_df_transformed, columns=all_feature_names)

In [13]:
def optimize_memory_usage(df):
    start_mem_usage = df.memory_usage().sum() / 1024 ** 2
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type.name in ['category', 'object']:
            raise ValueError(f"Column '{col}' is of type '{col_type.name}'")

        c_min = df[col].min()
        c_max = df[col].max()
        
        if str(col_type)[:3] == 'int':
            
            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                df[col] = df[col].astype(np.int8)
                
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                df[col] = df[col].astype(np.int16)
                
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                df[col] = df[col].astype(np.int32)
                
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                df[col] = df[col].astype(np.int64)
        
        else:
        
            if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                df[col] = df[col].astype(np.float16)
            
            elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                df[col] = df[col].astype(np.float32)
            
            else:
                df[col] = df[col].astype(np.float64)

    end_mem_usage = df.memory_usage().sum() / 1024**2
    print(f'------ Memory usage before: {start_mem_usage:.2f} MB')
    print(f'------ Memory usage after: {end_mem_usage:.2f} MB')
    print(f'------ Reduced memory usage by {(100 * (start_mem_usage - end_mem_usage) / start_mem_usage):.1f}%')
    print('**********************' * 5)

    return df

In [14]:
train_df_transformed = optimize_memory_usage(train_df_transformed)
test_df_transformed = optimize_memory_usage(test_df_transformed)

------ Memory usage before: 356.71 MB
------ Memory usage after: 89.18 MB
------ Reduced memory usage by 75.0%
**************************************************************************************************************
------ Memory usage before: 237.80 MB
------ Memory usage after: 59.45 MB
------ Reduced memory usage by 75.0%
**************************************************************************************************************


In [15]:
train_df_transformed.head(2)

Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season
0,0.534668,-0.681152,0.523438,53.0,72.0,72.0,8.0,44.0,28.0,59.0,55.0,5.0,18.0,25.0,0.0
1,-0.386475,-0.577148,-0.577148,71.0,56.0,64.0,8.0,44.0,28.0,46.0,47.0,18.0,39.0,25.0,3.0


In [16]:
test_df_transformed.head(2)

Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season
0,0.500488,1.771484,0.736816,59.0,59.0,44.0,18.0,66.0,35.0,52.0,51.0,17.0,15.0,16.0,0.0
1,0.126709,-1.880859,-0.049866,50.0,53.0,45.0,5.0,66.0,17.0,54.0,38.0,6.0,14.0,16.0,0.0


In [17]:
train_df_transformed['class'] = y_train
train_df_transformed.head(2)

Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season,class
0,0.534668,-0.681152,0.523438,53.0,72.0,72.0,8.0,44.0,28.0,59.0,55.0,5.0,18.0,25.0,0.0,0
1,-0.386475,-0.577148,-0.577148,71.0,56.0,64.0,8.0,44.0,28.0,46.0,47.0,18.0,39.0,25.0,3.0,1


In [18]:
# Since we have only one data set, spliting it into train and test (validation)

train_df_final, validation_df = train_test_split(train_df_transformed, train_size = 0.75, random_state = 42, stratify = train_df_transformed['class'])
logger.info(f"Train test split completed. Time elapsed: {time.time() - start_time:.2f} seconds")
train_df_final.head(2)

INFO:__main__:Train test split completed. Time elapsed: 37.99 seconds


Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season,class
2419543,0.169678,-0.025314,0.197144,67.0,72.0,64.0,8.0,47.0,31.0,27.0,55.0,5.0,18.0,29.0,3.0,0
910367,0.171753,0.108032,-0.09436,53.0,83.0,76.0,8.0,75.0,28.0,46.0,55.0,5.0,18.0,25.0,0.0,1


In [19]:
# Feature Selection
def feature_selection(X_train, y_train, model):
    if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
        selector = SelectFromModel(model, threshold='mean')
        selector.fit(X_train, y_train)
        return selector.transform(X_train), selector.get_support()
    else:
        raise ValueError("Feature selection not supported for this model.")

def alternative_feature_selection(X_train, y_train):
    selector = SelectKBest(score_func=f_classif, k='all')
    X_train_selected = selector.fit_transform(X_train, y_train)
    return X_train_selected, selector.get_support()

# Initialize models
models = {
    'Logistic Regression': LogisticRegression(),
    'Ridge Classifier': RidgeClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Bagging Classifier': BaggingClassifier(),
    'Random Forest': RandomForestClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'SVC': SVC(),
#    'KNN': KNeighborsClassifier(),
    'XGBoost': xgb.XGBClassifier(tree_method='gpu_hist'),
    'LightGBM': lgb.LGBMClassifier(device='gpu')
}

# Define parameter grids for RandomizedSearchCV
param_grids = {
    'Logistic Regression': {
        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'saga'],
        'penalty': ['l2', 'none'],  # 'elasticnet' is not supported by 'lbfgs'
        'C': uniform(0.001, 1000),  # Inverse of regularization strength
        'max_iter': [100, 200, 300]
    },
    'Ridge Classifier': {
        'alpha': uniform(0.1, 10),  # Regularization strength
        'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg'],
        'fit_intercept': [True, False],
        'max_iter': [100, 200, 300],
       # 'normalize': [True, False]  # This should be removed as it's not a valid parameter
    },
    'Decision Tree': {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': randint(3, 20),
    'min_samples_split': uniform(0.01, 0.1),  # Random float between 0.01 and 0.1
    'min_samples_leaf': uniform(0.01, 0.1),  # Random float between 0.01 and 0.1
    'min_weight_fraction_leaf': uniform(0.0, 0.1),  # Random float between 0.0 and 0.1
    'max_features': ['auto', 'sqrt', 'log2', None],
    'max_leaf_nodes': randint(10, 50),
    'min_impurity_decrease': uniform(0.0, 0.1),
    'class_weight': [None, 'balanced']
},
    'Bagging Classifier': {
    'base_estimator': [DecisionTreeClassifier(), None],  # Default is DecisionTreeClassifier
    'n_estimators': randint(10, 100),
    'max_samples': uniform(0.5, 0.5),  # Random float between 0.5 and 1.0
    'max_features': uniform(0.5, 0.5),  # Random float between 0.5 and 1.0
    'bootstrap': [True, False],
    'bootstrap_features': [True, False],
    'oob_score': [True, False],
    'n_jobs': [None, -1],
    'random_state': [42]  # Set to None for random results, or a fixed integer for reproducibility
},
    'Random Forest': {
    'n_estimators': randint(50, 300),
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, randint(3, 20)],
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'max_features': ['auto', 'sqrt', 'log2', uniform(0.5, 0.5)],
    'bootstrap': [True, False],
    'oob_score': [True, False],
    'n_jobs': [None, -1],
    'random_state': [42],
    'verbose': [0, 1],
    'warm_start': [True, False],
    'class_weight': [None, 'balanced']
},
    'AdaBoost Classifier': {
    'base_estimator': [None, DecisionTreeClassifier(max_depth=1)],
    'n_estimators': randint(50, 300),
    'learning_rate': uniform(0.01, 1.0),
    'algorithm': ['SAMME', 'SAMME.R'],
    'random_state': [42]
},
    'Gradient Boosting': {
    'n_estimators': randint(50, 300),
    'learning_rate': uniform(0.01, 0.5),
    'max_depth': randint(3, 10),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'max_features': ['sqrt', 'log2', None, uniform(0.1, 0.9)],
    'subsample': uniform(0.5, 0.5),
    'criterion': ['friedman_mse', 'squared_error', 'mae'],
    #'alpha': uniform(0.5, 0.5),
    'random_state': [42],
    'verbose': [0, 1]
},
    'SVC': {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': randint(2, 5),
    'gamma': ['scale', 'auto', uniform(0.001, 1)],
    'coef0': uniform(0, 10),
    'shrinking': [True, False],
    'probability': [True, False],
    'tol': uniform(1e-5, 1e-1),
    'cache_size': uniform(50, 500),
    'class_weight': [None, 'balanced'],
    'verbose': [True, False],
    'max_iter': [-1, 100, 200],
    'decision_function_shape': ['ovr', 'ovo'],
    'break_ties': [True, False],
    'random_state': [42]
},
    'KNN': {
    'n_neighbors': randint(1, 30),
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': randint(1, 3),
    'metric': ['minkowski', 'euclidean', 'manhattan', 'chebyshev', 'hamming'],
    'leaf_size': randint(10, 50),
    'n_jobs': [None, -1]
},
    'XGBoost': {
    'n_estimators': randint(100, 300),
    'max_depth': randint(3, 10),
    'learning_rate': uniform(0.01, 0.3),
    'subsample': uniform(0.5, 0.5),
    'colsample_bytree': uniform(0.5, 0.5),
    'gamma': uniform(0, 0.5),
    'min_child_weight': randint(1, 10),
    'reg_alpha': uniform(0, 1),
    'reg_lambda': uniform(0, 1),
    'scale_pos_weight': uniform(1, 10),
    'max_delta_step': randint(0, 10),
    'colsample_bylevel': uniform(0.5, 0.5),
    'colsample_bynode': uniform(0.5, 0.5)
},
    'LightGBM': {
    'num_leaves': randint(20, 100),
    'max_depth': randint(3, 15),
    'learning_rate': uniform(0.01, 0.3),
    'n_estimators': randint(100, 300),
    'min_child_samples': randint(10, 100),
    'min_split_gain': uniform(0, 0.5),
    'subsample': uniform(0.5, 0.5),
    'subsample_freq': randint(1, 10),
    'colsample_bytree': uniform(0.5, 0.5),
    'colsample_bylevel': uniform(0.5, 0.5),
    'reg_alpha': uniform(0, 1),
    'reg_lambda': uniform(0, 1),
    'scale_pos_weight': uniform(1, 10)
}
}

# Optuna objective function for XGBoost and LightGBM
def objective(trial, model_name, model, X_train, y_train, X_val, y_val):
    if model_name == 'XGBoost':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_uniform('gamma', 0, 0.5),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'reg_alpha': trial.suggest_uniform('reg_alpha', 0, 1),
            'reg_lambda': trial.suggest_uniform('reg_lambda', 0, 1),
            'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1, 10),
            'max_delta_step': trial.suggest_int('max_delta_step', 0, 10),
            'colsample_bylevel': trial.suggest_uniform('colsample_bylevel', 0.5, 1.0),
            'colsample_bynode': trial.suggest_uniform('colsample_bynode', 0.5, 1.0)
        }
    elif model_name == 'LightGBM':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 300),
            'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
            'num_leaves': trial.suggest_int('num_leaves', 31, 127),
            'max_depth': trial.suggest_int('max_depth', -1, 15),
            'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
            'reg_alpha': trial.suggest_uniform('reg_alpha', 0, 0.2),
            'reg_lambda': trial.suggest_uniform('reg_lambda', 0, 0.2),
            'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
            'min_split_gain': trial.suggest_uniform('min_split_gain', 0, 0.5),
            'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
            'colsample_bylevel': trial.suggest_uniform('colsample_bylevel', 0.5, 1.0),
            'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1, 10)
        }

    model.set_params(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    score = matthews_corrcoef(y_val, preds)
    return score

# Hyperparameter tuning using RandomizedSearchCV
def tune_model(model_name, model, param_grid, X_train, y_train):
    search = RandomizedSearchCV(
        model, param_distributions=param_grid, n_iter=10, scoring='f1', cv=3, random_state=42
    )
    search.fit(X_train, y_train)
    print(f"{model_name} Best Parameters (Random Search): {search.best_params_}")  # Print best parameters
    return search.best_estimator_, search.best_params_

# Hyperparameter tuning using Optuna for XGBoost and LightGBM
def optuna_tune_model(model_name, model, X_train, y_train, X_val, y_val):
    study = optuna.create_study(direction='maximize', sampler=TPESampler())
    study.optimize(lambda trial: objective(trial, model_name, model, X_train, y_train, X_val, y_val), n_trials=50)
    print(f"{model_name} Best Parameters (Optuna): {study.best_params}")  # Print best parameters
    return study.best_params

# Train and evaluate models
def evaluate_models(X_train, y_train, X_val, y_val):
    best_models = {}
    for name, model in models.items():
        logger.info(f"Evaluating {name}")

        # Feature Selection
        if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
            logger.info(f"Performing feature selection for {name}")
            X_train_selected, support = feature_selection(X_train, y_train, model)
            X_val_selected = X_val.iloc[:, support]
        else:
            logger.info(f"Skipping feature selection for {name} due to lack of support")
            X_train_selected = X_train
            X_val_selected = X_val

        if name in ['XGBoost', 'LightGBM']:
            # Perform Random Search
            best_model, best_params_random = tune_model(name, model, param_grids[name], X_train_selected, y_train)
            train_preds_random = best_model.predict(X_train_selected)
            val_preds_random = best_model.predict(X_val_selected)
            train_score_random = matthews_corrcoef(y_train, train_preds_random)
            val_score_random = matthews_corrcoef(y_val, val_preds_random)
            logger.info(f"{name} (Random Search) - Train MCC Score: {train_score_random:.4f}")
            logger.info(f"{name} (Random Search) - Validation MCC Score: {val_score_random:.4f}")
            
            # Perform Bayesian Search (Optuna)
            params_optuna = optuna_tune_model(name, model, X_train_selected, y_train, X_val_selected, y_val)
            model.set_params(**params_optuna)
            model.fit(X_train_selected, y_train)
            train_preds_optuna = model.predict(X_train_selected)
            val_preds_optuna = model.predict(X_val_selected)
            train_score_optuna = matthews_corrcoef(y_train, train_preds_optuna)
            val_score_optuna = matthews_corrcoef(y_val, val_preds_optuna)
            logger.info(f"{name} (Optuna) - Train MCC Score: {train_score_optuna:.4f}")
            logger.info(f"{name} (Optuna) - Validation MCC Score: {val_score_optuna:.4f}")
            logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")
            
        else:
            # Hyperparameter tuning using RandomizedSearchCV
            best_model, best_params = tune_model(name, model, param_grids[name], X_train_selected, y_train)
            model = best_model
            train_preds = model.predict(X_train_selected)
            val_preds = model.predict(X_val_selected)
            train_score = matthews_corrcoef(y_train, train_preds)
            val_score = matthews_corrcoef(y_val, val_preds)
            logger.info(f"{name} - Train MCC Score: {train_score:.4f}")
            logger.info(f"{name} - Validation MCC Score: {val_score:.4f}")
            logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

        best_models[name] = model

    return best_models

# Testing best models on the test set
def test_best_models(best_models, X_train, y_train, X_val, y_val):
    results = {}
    for name, model in best_models.items():
        # Training predictions and score
        train_preds = model.predict(X_train)
        train_score = matthews_corrcoef(y_train, train_preds)

        # Validation predictions and score
        val_preds = model.predict(X_val)
        val_score = matthews_corrcoef(y_val, val_preds)

        # Logging the scores
        logger.info(f"{name} - Train MCC Score: {train_score:.4f}")
        logger.info(f"{name} - Validation MCC Score: {val_score:.4f}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

        # Storing the results
        results[name] = {
            'train_score': train_score,
            'val_score': val_score,
            #'test_score': test_score
        }
    return results

In [20]:
y_train_final = train_df_final['class']
train_df_final = train_df_final.drop('class', axis = 1)

y_val = validation_df['class']
validation_df = validation_df.drop('class', axis = 1)

In [31]:
# Evaluate models
best_models = evaluate_models(train_df_final, y_train_final, validation_df, y_val)

logger.info(f"Model evaluation completed. Time elapsed: {time.time() - start_time:.2f} seconds")

INFO:__main__:Evaluating Logistic Regression
INFO:__main__:Skipping feature selection for Logistic Regression due to lack of support


Logistic Regression Best Parameters (Random Search): {'C': 155.99552033620265, 'max_iter': 300, 'penalty': 'l2', 'solver': 'saga'}


INFO:__main__:Logistic Regression - Train MCC Score: 0.2325
INFO:__main__:Logistic Regression - Validation MCC Score: 0.2336
INFO:__main__:Time elapsed: 2023.91 seconds
INFO:__main__:Evaluating Ridge Classifier
INFO:__main__:Skipping feature selection for Ridge Classifier due to lack of support


Ridge Classifier Best Parameters (Random Search): {'alpha': 1.9182496720710063, 'fit_intercept': True, 'max_iter': 100, 'solver': 'lsqr'}


INFO:__main__:Ridge Classifier - Train MCC Score: 0.2340
INFO:__main__:Ridge Classifier - Validation MCC Score: 0.2350
INFO:__main__:Time elapsed: 2080.16 seconds
INFO:__main__:Evaluating Decision Tree
INFO:__main__:Skipping feature selection for Decision Tree due to lack of support


Decision Tree Best Parameters (Random Search): {'class_weight': None, 'criterion': 'entropy', 'max_depth': 17, 'max_features': 'log2', 'max_leaf_nodes': 17, 'min_impurity_decrease': 0.05986584841970366, 'min_samples_leaf': 0.025601864044243652, 'min_samples_split': 0.025599452033620268, 'min_weight_fraction_leaf': 0.005808361216819946, 'splitter': 'random'}


INFO:__main__:Decision Tree - Train MCC Score: 0.0000
INFO:__main__:Decision Tree - Validation MCC Score: 0.0000
INFO:__main__:Time elapsed: 2119.35 seconds
INFO:__main__:Evaluating Bagging Classifier
INFO:__main__:Skipping feature selection for Bagging Classifier due to lack of support


Bagging Classifier Best Parameters (Random Search): {'base_estimator': None, 'bootstrap': False, 'bootstrap_features': False, 'max_features': 0.6039708314340944, 'max_samples': 0.7838501639099957, 'n_estimators': 72, 'n_jobs': -1, 'oob_score': False, 'random_state': 42}


INFO:__main__:Bagging Classifier - Train MCC Score: 0.9990
INFO:__main__:Bagging Classifier - Validation MCC Score: 0.9828
INFO:__main__:Time elapsed: 9401.68 seconds
INFO:__main__:Evaluating Random Forest
INFO:__main__:Skipping feature selection for Random Forest due to lack of support


Random Forest Best Parameters (Random Search): {'bootstrap': True, 'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 171, 'n_jobs': None, 'oob_score': True, 'random_state': 42, 'verbose': 0, 'warm_start': True}


INFO:__main__:Random Forest - Train MCC Score: 0.9837
INFO:__main__:Random Forest - Validation MCC Score: 0.9821
INFO:__main__:Time elapsed: 18675.60 seconds
INFO:__main__:Evaluating AdaBoost Classifier
INFO:__main__:Skipping feature selection for AdaBoost Classifier due to lack of support


AdaBoost Classifier Best Parameters (Random Search): {'algorithm': 'SAMME.R', 'base_estimator': None, 'learning_rate': 0.9799098521619943, 'n_estimators': 285, 'random_state': 42}


INFO:__main__:AdaBoost Classifier - Train MCC Score: 0.6550
INFO:__main__:AdaBoost Classifier - Validation MCC Score: 0.6553
INFO:__main__:Time elapsed: 36164.84 seconds
INFO:__main__:Evaluating Gradient Boosting
INFO:__main__:Skipping feature selection for Gradient Boosting due to lack of support


      Iter       Train Loss      OOB Improve   Remaining Time 
         1           1.2730           0.1036            3.78m
         2           1.1976           0.0758            3.77m
         3           1.1585           0.0400            3.56m
         4           1.1088           0.0488            3.63m
         5           1.0595           0.0513            3.51m
         6           1.0075           0.0476            3.41m
         7           0.9593           0.0527            3.40m
         8           0.9300           0.0261            3.34m
         9           0.8964           0.0303            3.38m
        10           0.8603           0.0399            3.32m
        20           0.5688           0.0161            3.12m
        30           0.4015           0.0051            2.86m
        40           0.3240           0.0087            2.63m
        50           0.2732           0.0105            2.35m
        60           0.2294           0.0038            2.09m
       

INFO:__main__:Gradient Boosting - Train MCC Score: 0.9773
INFO:__main__:Gradient Boosting - Validation MCC Score: 0.9770
INFO:__main__:Time elapsed: 39130.35 seconds
INFO:__main__:Evaluating SVC
INFO:__main__:Skipping feature selection for SVC due to lack of support


[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]SVC Best Parameters (Random Search): {'C': 6.159599747810113, 'break_ties': False, 'cache_size': 229.2328642721363, 'class_weight': None, 'coef0': 9.149596755437807, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'sigmoid', 'max_iter': 200, 'probability': True, 'random_state': 42, 'shrinking': False, 'tol': 0.03252833220267471, 'verbose': True}


INFO:__main__:SVC - Train MCC Score: 0.0809
INFO:__main__:SVC - Validation MCC Score: 0.0789
INFO:__main__:Time elapsed: 44650.73 seconds
INFO:__main__:Evaluating XGBoost
INFO:__main__:Skipping feature selection for XGBoost due to lack of support


XGBoost Best Parameters (Random Search): {'colsample_bylevel': 0.6872700594236812, 'colsample_bynode': 0.9753571532049581, 'colsample_bytree': 0.8659969709057025, 'gamma': 0.2993292420985183, 'learning_rate': 0.05680559213273095, 'max_delta_step': 2, 'max_depth': 9, 'min_child_weight': 8, 'n_estimators': 216, 'reg_alpha': 0.6011150117432088, 'reg_lambda': 0.7080725777960455, 'scale_pos_weight': 1.2058449429580245, 'subsample': 0.9849549260809971}


INFO:__main__:XGBoost (Random Search) - Train MCC Score: 0.9818
INFO:__main__:XGBoost (Random Search) - Validation MCC Score: 0.9811
[I 2024-08-12 07:34:19,988] A new study created in memory with name: no-name-2b82df44-d198-4d01-b8ac-c94f4a90c2f8
[I 2024-08-12 07:34:48,064] Trial 0 finished with value: 0.9738103464166992 and parameters: {'n_estimators': 133, 'max_depth': 10, 'learning_rate': 0.2289852640444636, 'subsample': 0.5616728821032592, 'colsample_bytree': 0.7920673136729479, 'gamma': 0.3204910118000997, 'min_child_weight': 9, 'reg_alpha': 0.12613543413879424, 'reg_lambda': 0.4494051896969873, 'scale_pos_weight': 7.886246573097004, 'max_delta_step': 0, 'colsample_bylevel': 0.7342904972804261, 'colsample_bynode': 0.9328659850372987}. Best is trial 0 with value: 0.9738103464166992.
[I 2024-08-12 07:35:28,605] Trial 1 finished with value: 0.9786212989692445 and parameters: {'n_estimators': 208, 'max_depth': 10, 'learning_rate': 0.16012900367018404, 'subsample': 0.9929193416491757, 

XGBoost Best Parameters (Optuna): {'n_estimators': 202, 'max_depth': 9, 'learning_rate': 0.23958788518583604, 'subsample': 0.9484896941246894, 'colsample_bytree': 0.6585770133495129, 'gamma': 0.3990553580730866, 'min_child_weight': 8, 'reg_alpha': 0.08486841624103159, 'reg_lambda': 0.9172395271333036, 'scale_pos_weight': 1.409365709006251, 'max_delta_step': 5, 'colsample_bylevel': 0.7324201991368923, 'colsample_bynode': 0.6984067959652569}


INFO:__main__:XGBoost (Optuna) - Train MCC Score: 0.9840
INFO:__main__:XGBoost (Optuna) - Validation MCC Score: 0.9825
INFO:__main__:Time elapsed: 46565.56 seconds
INFO:__main__:Evaluating LightGBM
INFO:__main__:Skipping feature selection for LightGBM due to lack of support


[LightGBM] [Info] Number of positive: 852698, number of negative: 705774
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 920
[LightGBM] [Info] Number of data points in the train set: 1558472, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.84 MB) transferred to GPU in 0.046050 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.29 MB) transferred to GPU in 0.045594 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.29 MB) tran

INFO:__main__:LightGBM (Random Search) - Train MCC Score: 0.9834
INFO:__main__:LightGBM (Random Search) - Validation MCC Score: 0.9821
[I 2024-08-12 08:17:24,591] A new study created in memory with name: no-name-96bcaa57-53a8-419d-be27-bbd77947ff94


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.072364 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (21.36 MB) transferred to GPU in 0.052233 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (21.34 MB) tr

[I 2024-08-12 08:17:59,426] Trial 0 finished with value: 0.36029137412437184 and parameters: {'n_estimators': 218, 'learning_rate': 0.02357268739510144, 'num_leaves': 62, 'max_depth': 3, 'subsample': 0.7977966992338452, 'colsample_bytree': 0.64487635414753, 'reg_alpha': 0.1115364155651663, 'reg_lambda': 0.18407782400126496, 'min_child_samples': 43, 'min_split_gain': 0.14833494197947034, 'subsample_freq': 7, 'colsample_bylevel': 0.9363775402302872, 'scale_pos_weight': 3.8427461269059897}. Best is trial 0 with value: 0.36029137412437184.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.074174 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.70 MB) transferred to GPU in 0.049600 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.68 MB) tr

[I 2024-08-12 08:19:05,313] Trial 1 finished with value: 0.9718047944384223 and parameters: {'n_estimators': 274, 'learning_rate': 0.28205638590585824, 'num_leaves': 111, 'max_depth': 11, 'subsample': 0.6983459748283443, 'colsample_bytree': 0.8884690278551692, 'reg_alpha': 0.13445695783808617, 'reg_lambda': 0.10054878848704263, 'min_child_samples': 88, 'min_split_gain': 0.11739036569158601, 'subsample_freq': 7, 'colsample_bylevel': 0.7579517482730446, 'scale_pos_weight': 8.997353619390966}. Best is trial 1 with value: 0.9718047944384223.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.073291 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (24.70 MB) transferred to GPU in 0.062210 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (24.70 MB) tr

[I 2024-08-12 08:19:18,919] Trial 2 finished with value: 0.0901112621419458 and parameters: {'n_estimators': 118, 'learning_rate': 0.06093893601448068, 'num_leaves': 59, 'max_depth': 1, 'subsample': 0.9232597534049081, 'colsample_bytree': 0.7946504030260526, 'reg_alpha': 0.1326185227928662, 'reg_lambda': 0.16630870435952835, 'min_child_samples': 11, 'min_split_gain': 0.13887036870412828, 'subsample_freq': 9, 'colsample_bylevel': 0.7347140722790332, 'scale_pos_weight': 3.3672427550439874}. Best is trial 1 with value: 0.9718047944384223.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.092382 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (23.63 MB) transferred to GPU in 0.069517 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (23.62 MB) tr

[I 2024-08-12 08:20:43,644] Trial 3 finished with value: 0.9746645790675581 and parameters: {'n_estimators': 200, 'learning_rate': 0.029117423093923213, 'num_leaves': 93, 'max_depth': 14, 'subsample': 0.8831690663234764, 'colsample_bytree': 0.6337433371569374, 'reg_alpha': 0.11718652417028136, 'reg_lambda': 0.14885629033936632, 'min_child_samples': 73, 'min_split_gain': 0.09859143852961821, 'subsample_freq': 3, 'colsample_bylevel': 0.8173439167778547, 'scale_pos_weight': 2.99262914868011}. Best is trial 3 with value: 0.9746645790675581.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.074802 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (24.73 MB) transferred to GPU in 0.061844 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (24.72 MB) tr

[I 2024-08-12 08:21:58,587] Trial 4 finished with value: 0.9680263499504378 and parameters: {'n_estimators': 238, 'learning_rate': 0.14190224464191348, 'num_leaves': 45, 'max_depth': 6, 'subsample': 0.923987558968751, 'colsample_bytree': 0.5949642918359175, 'reg_alpha': 0.1167825523593157, 'reg_lambda': 0.09354058336996762, 'min_child_samples': 89, 'min_split_gain': 0.46539473616665555, 'subsample_freq': 2, 'colsample_bylevel': 0.6130128429644977, 'scale_pos_weight': 8.853135121652523}. Best is trial 3 with value: 0.9746645790675581.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.075767 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.38 MB) transferred to GPU in 0.073754 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.38 MB) tr

[I 2024-08-12 08:22:17,692] Trial 5 finished with value: 0.45109983120222924 and parameters: {'n_estimators': 109, 'learning_rate': 0.09806447062384799, 'num_leaves': 115, 'max_depth': 3, 'subsample': 0.9860655649551969, 'colsample_bytree': 0.6506319119436326, 'reg_alpha': 0.14195459760685356, 'reg_lambda': 0.15165714485739937, 'min_child_samples': 81, 'min_split_gain': 0.12209801518131208, 'subsample_freq': 10, 'colsample_bylevel': 0.5434078583004089, 'scale_pos_weight': 8.345085180254838}. Best is trial 3 with value: 0.9746645790675581.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.064528 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.91 MB) transferred to GPU in 0.049346 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.90 MB) tr

[I 2024-08-12 08:23:03,152] Trial 6 finished with value: 0.9464564613392354 and parameters: {'n_estimators': 202, 'learning_rate': 0.06983351180901402, 'num_leaves': 35, 'max_depth': 6, 'subsample': 0.6689422511013281, 'colsample_bytree': 0.612150211092471, 'reg_alpha': 0.12932566260890174, 'reg_lambda': 0.15096106353719563, 'min_child_samples': 43, 'min_split_gain': 0.009945902931793704, 'subsample_freq': 10, 'colsample_bylevel': 0.782366328659934, 'scale_pos_weight': 9.170903788360036}. Best is trial 3 with value: 0.9746645790675581.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.071740 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (20.02 MB) transferred to GPU in 0.056948 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (20.01 MB) tr

[I 2024-08-12 08:24:03,656] Trial 7 finished with value: 0.9768242678288807 and parameters: {'n_estimators': 217, 'learning_rate': 0.08554437065166058, 'num_leaves': 49, 'max_depth': 10, 'subsample': 0.7477727333807387, 'colsample_bytree': 0.901758159132021, 'reg_alpha': 0.03127581089740803, 'reg_lambda': 0.13453450150190358, 'min_child_samples': 84, 'min_split_gain': 0.1649812061914961, 'subsample_freq': 4, 'colsample_bylevel': 0.7092030970857549, 'scale_pos_weight': 3.143223707853628}. Best is trial 7 with value: 0.9768242678288807.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.072975 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.82 MB) transferred to GPU in 0.045725 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.81 MB) tr

[I 2024-08-12 08:24:24,204] Trial 8 finished with value: 0.32888085955320284 and parameters: {'n_estimators': 174, 'learning_rate': 0.11039959567319031, 'num_leaves': 122, 'max_depth': 2, 'subsample': 0.7030453915864322, 'colsample_bytree': 0.9053777187241894, 'reg_alpha': 0.12629936297613117, 'reg_lambda': 0.19206950891148789, 'min_child_samples': 19, 'min_split_gain': 0.48747771133986745, 'subsample_freq': 9, 'colsample_bylevel': 0.5890541732903949, 'scale_pos_weight': 8.743129653030344}. Best is trial 7 with value: 0.9768242678288807.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.069398 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (22.33 MB) transferred to GPU in 0.054617 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (22.32 MB) tr

[I 2024-08-12 08:25:04,562] Trial 9 finished with value: 0.9661772948633629 and parameters: {'n_estimators': 102, 'learning_rate': 0.11522984305783522, 'num_leaves': 108, 'max_depth': 11, 'subsample': 0.8343405794562215, 'colsample_bytree': 0.8510560483970063, 'reg_alpha': 0.08069799560414917, 'reg_lambda': 0.15528751424576026, 'min_child_samples': 84, 'min_split_gain': 0.17459716900682165, 'subsample_freq': 3, 'colsample_bylevel': 0.536742560369391, 'scale_pos_weight': 9.358584198358818}. Best is trial 7 with value: 0.9768242678288807.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.063591 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.99 MB) transferred to GPU in 0.041211 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.97 MB) tr

[I 2024-08-12 08:26:11,275] Trial 10 finished with value: 0.9812294882195829 and parameters: {'n_estimators': 296, 'learning_rate': 0.2035943570308198, 'num_leaves': 76, 'max_depth': 10, 'subsample': 0.5220978786516434, 'colsample_bytree': 0.9858343902308175, 'reg_alpha': 0.01629716638231435, 'reg_lambda': 0.0034234647115942013, 'min_child_samples': 63, 'min_split_gain': 0.3249803217280466, 'subsample_freq': 5, 'colsample_bylevel': 0.9998920951227253, 'scale_pos_weight': 1.2727012537553835}. Best is trial 10 with value: 0.9812294882195829.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.067731 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.48 MB) transferred to GPU in 0.042440 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.46 MB) tr

[I 2024-08-12 08:27:17,877] Trial 11 finished with value: 0.9813121217631547 and parameters: {'n_estimators': 297, 'learning_rate': 0.21975990772766457, 'num_leaves': 77, 'max_depth': 10, 'subsample': 0.5031096590967538, 'colsample_bytree': 0.9954824568778243, 'reg_alpha': 0.00780926048926231, 'reg_lambda': 0.020320698193575223, 'min_child_samples': 63, 'min_split_gain': 0.3309765233153028, 'subsample_freq': 5, 'colsample_bylevel': 0.9911959600269812, 'scale_pos_weight': 1.100045284744043}. Best is trial 11 with value: 0.9813121217631547.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.065504 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.59 MB) transferred to GPU in 0.038309 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.57 MB) tr

[I 2024-08-12 08:28:23,020] Trial 12 finished with value: 0.9806418455634396 and parameters: {'n_estimators': 299, 'learning_rate': 0.2177144882286764, 'num_leaves': 81, 'max_depth': 15, 'subsample': 0.5072751044259819, 'colsample_bytree': 0.9994399165151681, 'reg_alpha': 0.004253194047258518, 'reg_lambda': 0.018297940337138448, 'min_child_samples': 60, 'min_split_gain': 0.3443317078743986, 'subsample_freq': 5, 'colsample_bylevel': 0.9895427553638677, 'scale_pos_weight': 1.5350667011071653}. Best is trial 11 with value: 0.9813121217631547.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.063938 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.44 MB) transferred to GPU in 0.035459 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.43 MB) tr

[I 2024-08-12 08:29:24,525] Trial 13 finished with value: 0.9811426148503017 and parameters: {'n_estimators': 266, 'learning_rate': 0.20809142088922675, 'num_leaves': 76, 'max_depth': 9, 'subsample': 0.5018757140227009, 'colsample_bytree': 0.9986947107660868, 'reg_alpha': 0.1880757183432497, 'reg_lambda': 0.003723517399859339, 'min_child_samples': 60, 'min_split_gain': 0.3410945891268732, 'subsample_freq': 6, 'colsample_bylevel': 0.8942614025515694, 'scale_pos_weight': 1.4622005597638812}. Best is trial 11 with value: 0.9813121217631547.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.078420 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-12 08:30:32,655] Trial 14 finished with value: 0.975879452386798 and parameters: {'n_estimators': 300, 'learning_rate': 0.2061813158554309, 'num_leaves': 89, 'max_depth': 8, 'subsample': 0.5897064951538323, 'colsample_bytree': 0.502722323754344, 'reg_alpha': 0.052102611172300244, 'reg_lambda': 0.03855192566644403, 'min_child_samples': 44, 'min_split_gain': 0.31197199981599555, 'subsample_freq': 1, 'colsample_bylevel': 0.9949395749881313, 'scale_pos_weight': 6.0264921953001265}. Best is trial 11 with value: 0.9813121217631547.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.072560 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.73 MB) transferred to GPU in 0.043240 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.71 MB) tr

[I 2024-08-12 08:31:28,430] Trial 15 finished with value: 0.9753060843710577 and parameters: {'n_estimators': 261, 'learning_rate': 0.26947598687318186, 'num_leaves': 73, 'max_depth': 13, 'subsample': 0.6246141057315252, 'colsample_bytree': 0.9549392357395489, 'reg_alpha': 0.006185891644862702, 'reg_lambda': 0.05135952483807869, 'min_child_samples': 71, 'min_split_gain': 0.4108680581043229, 'subsample_freq': 5, 'colsample_bylevel': 0.8782873197538503, 'scale_pos_weight': 5.228687130007197}. Best is trial 11 with value: 0.9813121217631547.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.065419 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.62 MB) transferred to GPU in 0.046585 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.60 MB) tr

[I 2024-08-12 08:32:12,791] Trial 16 finished with value: 0.9818984289821313 and parameters: {'n_estimators': 168, 'learning_rate': 0.18081848934184955, 'num_leaves': 98, 'max_depth': 12, 'subsample': 0.5830431132249667, 'colsample_bytree': 0.7611077723671312, 'reg_alpha': 0.049490264021188966, 'reg_lambda': 0.051833568032498556, 'min_child_samples': 100, 'min_split_gain': 0.25102171113843785, 'subsample_freq': 7, 'colsample_bylevel': 0.9311802936770969, 'scale_pos_weight': 1.0274498386079336}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.079611 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.62 MB) transferred to GPU in 0.045365 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.60 MB) tr

[I 2024-08-12 08:32:52,445] Trial 17 finished with value: 0.9802527193647445 and parameters: {'n_estimators': 146, 'learning_rate': 0.15739045615733346, 'num_leaves': 95, 'max_depth': 13, 'subsample': 0.5830770868882127, 'colsample_bytree': 0.7339191873816482, 'reg_alpha': 0.059937904437846606, 'reg_lambda': 0.06895034922700885, 'min_child_samples': 95, 'min_split_gain': 0.2458139865845385, 'subsample_freq': 7, 'colsample_bylevel': 0.917468977109756, 'scale_pos_weight': 2.2594715963777556}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.069357 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.19 MB) transferred to GPU in 0.045024 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.17 MB) tr

[I 2024-08-12 08:33:37,418] Trial 18 finished with value: 0.975708335855873 and parameters: {'n_estimators': 174, 'learning_rate': 0.16722297877546172, 'num_leaves': 101, 'max_depth': 7, 'subsample': 0.5669024599649097, 'colsample_bytree': 0.7248871387316623, 'reg_alpha': 0.03910707473610676, 'reg_lambda': 0.034194927376995296, 'min_child_samples': 30, 'min_split_gain': 0.2534053487784818, 'subsample_freq': 8, 'colsample_bylevel': 0.8447001440819544, 'scale_pos_weight': 4.801283771808174}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.067134 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.22 MB) transferred to GPU in 0.048196 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.20 MB) tr

[I 2024-08-12 08:34:14,072] Trial 19 finished with value: 0.9723005251509107 and parameters: {'n_estimators': 165, 'learning_rate': 0.24283187412400492, 'num_leaves': 64, 'max_depth': 12, 'subsample': 0.6429198504736056, 'colsample_bytree': 0.7824347546124503, 'reg_alpha': 0.08204716111836413, 'reg_lambda': 0.0767381728968403, 'min_child_samples': 97, 'min_split_gain': 0.24734543258717887, 'subsample_freq': 6, 'colsample_bylevel': 0.9351366061866841, 'scale_pos_weight': 6.991722775781286}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.064276 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.06 MB) transferred to GPU in 0.043094 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.04 MB) tr

[I 2024-08-12 08:34:51,849] Trial 20 finished with value: 0.9800400817742518 and parameters: {'n_estimators': 136, 'learning_rate': 0.1827117437495142, 'num_leaves': 127, 'max_depth': 15, 'subsample': 0.5620767623441261, 'colsample_bytree': 0.8464812749493583, 'reg_alpha': 0.027790390755633883, 'reg_lambda': 0.11468715760494631, 'min_child_samples': 51, 'min_split_gain': 0.4112070441223314, 'subsample_freq': 8, 'colsample_bylevel': 0.647529739645544, 'scale_pos_weight': 2.140065040496144}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.097142 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.24 MB) transferred to GPU in 0.047152 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.22 MB) tr

[I 2024-08-12 08:36:04,998] Trial 21 finished with value: 0.9810862941446161 and parameters: {'n_estimators': 287, 'learning_rate': 0.25322929750194906, 'num_leaves': 88, 'max_depth': 9, 'subsample': 0.5315933383790874, 'colsample_bytree': 0.9517348050443458, 'reg_alpha': 0.020111448087147002, 'reg_lambda': 0.0024282730798322397, 'min_child_samples': 68, 'min_split_gain': 0.3067686179694314, 'subsample_freq': 4, 'colsample_bylevel': 0.9718441020067629, 'scale_pos_weight': 1.0081563137447382}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.066527 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.38 MB) transferred to GPU in 0.039321 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.35 MB) tr

[I 2024-08-12 08:36:59,691] Trial 22 finished with value: 0.9802498181152197 and parameters: {'n_estimators': 241, 'learning_rate': 0.22558366820074277, 'num_leaves': 70, 'max_depth': 11, 'subsample': 0.5365645687758802, 'colsample_bytree': 0.9517483412868004, 'reg_alpha': 0.05460624396838353, 'reg_lambda': 0.02637921927348064, 'min_child_samples': 76, 'min_split_gain': 0.37733691695573596, 'subsample_freq': 4, 'colsample_bylevel': 0.9583971008169037, 'scale_pos_weight': 2.213066025741117}. Best is trial 16 with value: 0.9818984289821313.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.079301 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.36 MB) transferred to GPU in 0.050567 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.34 MB) tr

[I 2024-08-12 08:38:01,494] Trial 23 finished with value: 0.9819129280645352 and parameters: {'n_estimators': 247, 'learning_rate': 0.1889999771851876, 'num_leaves': 82, 'max_depth': 9, 'subsample': 0.6109107789879599, 'colsample_bytree': 0.6942902647199227, 'reg_alpha': 0.001910962076460937, 'reg_lambda': 0.050612026587226244, 'min_child_samples': 65, 'min_split_gain': 0.21371351261604138, 'subsample_freq': 6, 'colsample_bylevel': 0.868113128943391, 'scale_pos_weight': 1.1166645615626603}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.062260 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.45 MB) transferred to GPU in 0.040454 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.43 MB) tr

[I 2024-08-12 08:38:57,441] Trial 24 finished with value: 0.9765300873620106 and parameters: {'n_estimators': 243, 'learning_rate': 0.17726829662767382, 'num_leaves': 84, 'max_depth': 6, 'subsample': 0.6140899359710709, 'colsample_bytree': 0.6840574019977895, 'reg_alpha': 0.0007120295846845329, 'reg_lambda': 0.055117933973056525, 'min_child_samples': 51, 'min_split_gain': 0.2204539027691565, 'subsample_freq': 6, 'colsample_bylevel': 0.8569521206044367, 'scale_pos_weight': 4.203924075517666}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.070983 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.64 MB) transferred to GPU in 0.053001 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.62 MB) tr

[I 2024-08-12 08:40:07,393] Trial 25 finished with value: 0.9808017887617546 and parameters: {'n_estimators': 278, 'learning_rate': 0.1382281896837163, 'num_leaves': 96, 'max_depth': -1, 'subsample': 0.6587657149748821, 'colsample_bytree': 0.547019136339274, 'reg_alpha': 0.07496832496523775, 'reg_lambda': 0.05155669596186718, 'min_child_samples': 32, 'min_split_gain': 0.27897642487654173, 'subsample_freq': 8, 'colsample_bylevel': 0.8167186587798866, 'scale_pos_weight': 2.4945535804789585}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.062615 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.18 MB) transferred to GPU in 0.042200 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.16 MB) tr

[I 2024-08-12 08:41:13,033] Trial 26 finished with value: 0.9806952872421727 and parameters: {'n_estimators': 251, 'learning_rate': 0.2958005853899225, 'num_leaves': 102, 'max_depth': 8, 'subsample': 0.6040879234437369, 'colsample_bytree': 0.7060124968378801, 'reg_alpha': 0.04456094470688949, 'reg_lambda': 0.07688801908818012, 'min_child_samples': 100, 'min_split_gain': 0.19642167042562414, 'subsample_freq': 7, 'colsample_bylevel': 0.9050051972665943, 'scale_pos_weight': 2.0090146144103085}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.069811 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.82 MB) transferred to GPU in 0.036277 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.80 MB) tr

[I 2024-08-12 08:42:03,862] Trial 27 finished with value: 0.9739520569768255 and parameters: {'n_estimators': 225, 'learning_rate': 0.1909029858159656, 'num_leaves': 84, 'max_depth': 13, 'subsample': 0.5530858302582216, 'colsample_bytree': 0.7734178213387382, 'reg_alpha': 0.02327158750930631, 'reg_lambda': 0.020130111278431076, 'min_child_samples': 64, 'min_split_gain': 0.05204657225472814, 'subsample_freq': 6, 'colsample_bylevel': 0.9427750017124874, 'scale_pos_weight': 6.302547875958693}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.077862 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (19.63 MB) transferred to GPU in 0.064419 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (19.62 MB) tr

[I 2024-08-12 08:42:44,713] Trial 28 finished with value: 0.9798294061652 and parameters: {'n_estimators': 186, 'learning_rate': 0.24380723669305382, 'num_leaves': 53, 'max_depth': 5, 'subsample': 0.7333610676068497, 'colsample_bytree': 0.8240474767178063, 'reg_alpha': 0.06663426255886007, 'reg_lambda': 0.060281260931476094, 'min_child_samples': 77, 'min_split_gain': 0.28211612506017664, 'subsample_freq': 5, 'colsample_bylevel': 0.8707028534568604, 'scale_pos_weight': 1.1002770393157741}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.069674 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (21.21 MB) transferred to GPU in 0.050262 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (21.20 MB) tr

[I 2024-08-12 08:43:45,450] Trial 29 finished with value: 0.9782680369218063 and parameters: {'n_estimators': 219, 'learning_rate': 0.13708139331198627, 'num_leaves': 65, 'max_depth': 9, 'subsample': 0.7924506584869208, 'colsample_bytree': 0.6816378939605866, 'reg_alpha': 0.09783060141367012, 'reg_lambda': 0.04330897496418003, 'min_child_samples': 34, 'min_split_gain': 0.3755860566786666, 'subsample_freq': 7, 'colsample_bylevel': 0.9305891905573584, 'scale_pos_weight': 3.559092791506219}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.067711 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.47 MB) transferred to GPU in 0.052721 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.45 MB) tr

[I 2024-08-12 08:44:27,400] Trial 30 finished with value: 0.9773498836288271 and parameters: {'n_estimators': 154, 'learning_rate': 0.22914850736477718, 'num_leaves': 69, 'max_depth': 12, 'subsample': 0.6898763515633335, 'colsample_bytree': 0.56038051258074, 'reg_alpha': 0.012637989985528819, 'reg_lambda': 0.09060241457016024, 'min_child_samples': 50, 'min_split_gain': 0.20220320903994068, 'subsample_freq': 3, 'colsample_bylevel': 0.8274513329359436, 'scale_pos_weight': 4.180732561647766}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.067528 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.10 MB) transferred to GPU in 0.043466 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.07 MB) tr

[I 2024-08-12 08:45:31,509] Trial 31 finished with value: 0.9812169269181649 and parameters: {'n_estimators': 285, 'learning_rate': 0.19043859524830287, 'num_leaves': 77, 'max_depth': 10, 'subsample': 0.5261534185181099, 'colsample_bytree': 0.9293681553732578, 'reg_alpha': 0.017446330415683435, 'reg_lambda': 0.01531628319746818, 'min_child_samples': 59, 'min_split_gain': 0.31946420079779236, 'subsample_freq': 5, 'colsample_bylevel': 0.9643235093307184, 'scale_pos_weight': 1.5715808625256047}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.069350 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.03 MB) transferred to GPU in 0.048334 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.00 MB) tr

[I 2024-08-12 08:46:35,184] Trial 32 finished with value: 0.9793651539090422 and parameters: {'n_estimators': 272, 'learning_rate': 0.1996997445598963, 'num_leaves': 79, 'max_depth': 10, 'subsample': 0.5607890575244316, 'colsample_bytree': 0.9743233454730348, 'reg_alpha': 0.036453007894236625, 'reg_lambda': 0.02843691691489443, 'min_child_samples': 68, 'min_split_gain': 0.22303317367329112, 'subsample_freq': 4, 'colsample_bylevel': 0.9986834649944234, 'scale_pos_weight': 2.6449875434122436}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.063838 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.73 MB) transferred to GPU in 0.040385 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.71 MB) tr

[I 2024-08-12 08:47:34,800] Trial 33 finished with value: 0.9816034344309629 and parameters: {'n_estimators': 257, 'learning_rate': 0.16693789302255566, 'num_leaves': 102, 'max_depth': 12, 'subsample': 0.5124982679548271, 'colsample_bytree': 0.8693381558174761, 'reg_alpha': 0.015609780426539623, 'reg_lambda': 0.0109499138027895, 'min_child_samples': 55, 'min_split_gain': 0.27942176864707924, 'subsample_freq': 7, 'colsample_bylevel': 0.961039411138627, 'scale_pos_weight': 1.0226797916354917}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.062401 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.66 MB) transferred to GPU in 0.044922 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.64 MB) tr

[I 2024-08-12 08:48:38,645] Trial 34 finished with value: 0.9812083369138123 and parameters: {'n_estimators': 258, 'learning_rate': 0.1617102619415425, 'num_leaves': 106, 'max_depth': 12, 'subsample': 0.622179348868186, 'colsample_bytree': 0.8247123240292511, 'reg_alpha': 0.0014283737374446298, 'reg_lambda': 0.03966833006355746, 'min_child_samples': 56, 'min_split_gain': 0.28002648236560246, 'subsample_freq': 7, 'colsample_bylevel': 0.905531765720127, 'scale_pos_weight': 1.842222745357208}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.058828 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.48 MB) transferred to GPU in 0.040680 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (13.46 MB) tr

[I 2024-08-12 08:49:38,628] Trial 35 finished with value: 0.9794150212772023 and parameters: {'n_estimators': 228, 'learning_rate': 0.1733288874003066, 'num_leaves': 113, 'max_depth': 8, 'subsample': 0.5031559021697276, 'colsample_bytree': 0.868803334984027, 'reg_alpha': 0.0454102690674743, 'reg_lambda': 0.012328626956100708, 'min_child_samples': 39, 'min_split_gain': 0.2632078167119967, 'subsample_freq': 8, 'colsample_bylevel': 0.784649281955143, 'scale_pos_weight': 2.9123253544140435}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.067629 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.48 MB) transferred to GPU in 0.046505 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.45 MB) tr

[I 2024-08-12 08:50:29,252] Trial 36 finished with value: 0.9818541821247039 and parameters: {'n_estimators': 200, 'learning_rate': 0.15009690071124862, 'num_leaves': 100, 'max_depth': 14, 'subsample': 0.5776079356724267, 'colsample_bytree': 0.7516472247430672, 'reg_alpha': 0.15245619768092153, 'reg_lambda': 0.10635587704268287, 'min_child_samples': 48, 'min_split_gain': 0.09346054732116965, 'subsample_freq': 9, 'colsample_bylevel': 0.9635653967590293, 'scale_pos_weight': 1.052294042410672}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.069512 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.73 MB) transferred to GPU in 0.044279 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.71 MB) tr

[I 2024-08-12 08:51:19,648] Trial 37 finished with value: 0.9813587265196959 and parameters: {'n_estimators': 201, 'learning_rate': 0.14323742701125028, 'num_leaves': 100, 'max_depth': 14, 'subsample': 0.5873358487109421, 'colsample_bytree': 0.7549345266266803, 'reg_alpha': 0.1780248318437033, 'reg_lambda': 0.11719556804707584, 'min_child_samples': 26, 'min_split_gain': 0.0902510288184854, 'subsample_freq': 10, 'colsample_bylevel': 0.7032060672366868, 'scale_pos_weight': 1.7278864311783197}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.065071 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.37 MB) transferred to GPU in 0.047435 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.35 MB) tr

[I 2024-08-12 08:51:55,382] Trial 38 finished with value: 0.9659437181797645 and parameters: {'n_estimators': 126, 'learning_rate': 0.12162687508420023, 'num_leaves': 90, 'max_depth': 14, 'subsample': 0.6486614088199216, 'colsample_bytree': 0.8008112561889263, 'reg_alpha': 0.156764056077345, 'reg_lambda': 0.10449999625838373, 'min_child_samples': 46, 'min_split_gain': 0.12720498889656695, 'subsample_freq': 9, 'colsample_bylevel': 0.9504198652608141, 'scale_pos_weight': 9.854800951117639}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.075459 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (19.24 MB) transferred to GPU in 0.056028 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (19.22 MB) tr

[I 2024-08-12 08:53:04,469] Trial 39 finished with value: 0.9612011021397134 and parameters: {'n_estimators': 192, 'learning_rate': 0.025689498912255887, 'num_leaves': 117, 'max_depth': 12, 'subsample': 0.7185494452833504, 'colsample_bytree': 0.6643117390943906, 'reg_alpha': 0.09839450962993299, 'reg_lambda': 0.0861480381802615, 'min_child_samples': 39, 'min_split_gain': 0.040460263170938926, 'subsample_freq': 9, 'colsample_bylevel': 0.5000040612216183, 'scale_pos_weight': 7.830735304367131}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.059578 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (20.57 MB) transferred to GPU in 0.058073 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (20.55 MB) tr

[I 2024-08-12 08:54:21,121] Trial 40 finished with value: 0.9776058522717597 and parameters: {'n_estimators': 207, 'learning_rate': 0.04521303899484436, 'num_leaves': 108, 'max_depth': 15, 'subsample': 0.7682509072621303, 'colsample_bytree': 0.6282322434296093, 'reg_alpha': 0.14601052113226196, 'reg_lambda': 0.12629774866842503, 'min_child_samples': 10, 'min_split_gain': 0.0866726669069234, 'subsample_freq': 9, 'colsample_bylevel': 0.7893574564192227, 'scale_pos_weight': 3.3936273154303214}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.081165 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.62 MB) transferred to GPU in 0.045820 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.60 MB) tr

[I 2024-08-12 08:55:09,112] Trial 41 finished with value: 0.9812526329199239 and parameters: {'n_estimators': 188, 'learning_rate': 0.1429542998053252, 'num_leaves': 97, 'max_depth': 14, 'subsample': 0.5831640305991512, 'colsample_bytree': 0.7543297442031541, 'reg_alpha': 0.1817424903649488, 'reg_lambda': 0.10643533284810061, 'min_child_samples': 20, 'min_split_gain': 0.09872089582642152, 'subsample_freq': 10, 'colsample_bylevel': 0.7077437327571033, 'scale_pos_weight': 1.6852601903487252}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.079885 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.06 MB) transferred to GPU in 0.052636 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (18.05 MB) tr

[I 2024-08-12 08:56:01,741] Trial 42 finished with value: 0.9797776364694626 and parameters: {'n_estimators': 208, 'learning_rate': 0.15135180968109116, 'num_leaves': 101, 'max_depth': 14, 'subsample': 0.6746063004828666, 'colsample_bytree': 0.7550243489415088, 'reg_alpha': 0.17195847130664368, 'reg_lambda': 0.12184367253978144, 'min_child_samples': 23, 'min_split_gain': 0.1561624729038568, 'subsample_freq': 10, 'colsample_bylevel': 0.6820254716608627, 'scale_pos_weight': 2.720081042374967}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.070822 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.68 MB) transferred to GPU in 0.041676 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (14.66 MB) tr

[I 2024-08-12 08:56:46,855] Trial 43 finished with value: 0.9810061569216193 and parameters: {'n_estimators': 170, 'learning_rate': 0.12715948505658814, 'num_leaves': 103, 'max_depth': 13, 'subsample': 0.5480297407405822, 'colsample_bytree': 0.8059884674724617, 'reg_alpha': 0.16731219239893053, 'reg_lambda': 0.17433808980859558, 'min_child_samples': 91, 'min_split_gain': 0.06555039752669364, 'subsample_freq': 10, 'colsample_bylevel': 0.7448062217599996, 'scale_pos_weight': 1.7905411093215498}. Best is trial 23 with value: 0.9819129280645352.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.074322 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.95 MB) transferred to GPU in 0.042502 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (15.92 MB) tr

[I 2024-08-12 08:57:54,846] Trial 44 finished with value: 0.9820590501741864 and parameters: {'n_estimators': 233, 'learning_rate': 0.09554089088242068, 'num_leaves': 117, 'max_depth': 11, 'subsample': 0.595271788428719, 'colsample_bytree': 0.7085181455028883, 'reg_alpha': 0.19897165336160308, 'reg_lambda': 0.13427322639962427, 'min_child_samples': 26, 'min_split_gain': 0.18636874352935062, 'subsample_freq': 8, 'colsample_bylevel': 0.6711826583563664, 'scale_pos_weight': 1.0239900845521561}. Best is trial 44 with value: 0.9820590501741864.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.073716 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.98 MB) transferred to GPU in 0.046844 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.96 MB) tr

[I 2024-08-12 08:59:06,786] Trial 45 finished with value: 0.9818396357841599 and parameters: {'n_estimators': 236, 'learning_rate': 0.07938517429683063, 'num_leaves': 119, 'max_depth': 11, 'subsample': 0.6340891839836539, 'colsample_bytree': 0.7079330705861092, 'reg_alpha': 0.19797558336817184, 'reg_lambda': 0.14063177136992375, 'min_child_samples': 16, 'min_split_gain': 0.18340319873167485, 'subsample_freq': 8, 'colsample_bylevel': 0.6434195674669649, 'scale_pos_weight': 1.009639509213875}. Best is trial 44 with value: 0.9820590501741864.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.077949 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.49 MB) transferred to GPU in 0.071554 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.49 MB) tr

[I 2024-08-12 09:00:25,489] Trial 46 finished with value: 0.98173325115206 and parameters: {'n_estimators': 231, 'learning_rate': 0.08895290219795214, 'num_leaves': 120, 'max_depth': 11, 'subsample': 0.9902483458592899, 'colsample_bytree': 0.7087783582671995, 'reg_alpha': 0.19326649217766848, 'reg_lambda': 0.1423218393296167, 'min_child_samples': 15, 'min_split_gain': 0.1900883826507935, 'subsample_freq': 8, 'colsample_bylevel': 0.6059352170109062, 'scale_pos_weight': 1.414177520026474}. Best is trial 44 with value: 0.9820590501741864.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.075272 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.06 MB) transferred to GPU in 0.047857 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (17.04 MB) tr

[I 2024-08-12 09:01:44,585] Trial 47 finished with value: 0.9644642334171429 and parameters: {'n_estimators': 235, 'learning_rate': 0.01058208885322584, 'num_leaves': 127, 'max_depth': 9, 'subsample': 0.63696534865503, 'colsample_bytree': 0.6560905780527154, 'reg_alpha': 0.1994954364471533, 'reg_lambda': 0.16301904424494784, 'min_child_samples': 15, 'min_split_gain': 0.15366288795341007, 'subsample_freq': 8, 'colsample_bylevel': 0.6535803924519088, 'scale_pos_weight': 2.4842464567326092}. Best is trial 44 with value: 0.9820590501741864.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.061986 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.19 MB) transferred to GPU in 0.043788 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (16.17 MB) tr

[I 2024-08-12 09:02:53,607] Trial 48 finished with value: 0.9816223052584561 and parameters: {'n_estimators': 214, 'learning_rate': 0.06990599333705164, 'num_leaves': 117, 'max_depth': 11, 'subsample': 0.604558621354862, 'colsample_bytree': 0.6943357206064859, 'reg_alpha': 0.15207032322685926, 'reg_lambda': 0.13873895187563023, 'min_child_samples': 28, 'min_split_gain': 0.00449386529431732, 'subsample_freq': 9, 'colsample_bylevel': 0.6407924425229861, 'scale_pos_weight': 1.3404337660448642}. Best is trial 44 with value: 0.9820590501741864.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 0.071321 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (25.41 MB) transferred to GPU in 0.070347 secs. 1 sparse feature groups
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (25.41 MB) tr

[I 2024-08-12 09:04:07,639] Trial 49 finished with value: 0.9794088587516312 and parameters: {'n_estimators': 219, 'learning_rate': 0.10632027299011061, 'num_leaves': 122, 'max_depth': 10, 'subsample': 0.9498171065810077, 'colsample_bytree': 0.7306044629422778, 'reg_alpha': 0.11170233209607242, 'reg_lambda': 0.12648583942306546, 'min_child_samples': 16, 'min_split_gain': 0.22476166612696544, 'subsample_freq': 7, 'colsample_bylevel': 0.575276266704293, 'scale_pos_weight': 3.126214717718801}. Best is trial 44 with value: 0.9820590501741864.


LightGBM Best Parameters (Optuna): {'n_estimators': 233, 'learning_rate': 0.09554089088242068, 'num_leaves': 117, 'max_depth': 11, 'subsample': 0.595271788428719, 'colsample_bytree': 0.7085181455028883, 'reg_alpha': 0.19897165336160308, 'reg_lambda': 0.13427322639962427, 'min_child_samples': 26, 'min_split_gain': 0.18636874352935062, 'subsample_freq': 8, 'colsample_bylevel': 0.6711826583563664, 'scale_pos_weight': 1.0239900845521561}
[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 927
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 15
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce MX250, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 12 dense feature groups (26.75 MB) transferred to GPU in 

INFO:__main__:LightGBM (Optuna) - Train MCC Score: 0.9829
INFO:__main__:LightGBM (Optuna) - Validation MCC Score: 0.9820
INFO:__main__:Time elapsed: 50610.02 seconds
INFO:__main__:Model evaluation completed. Time elapsed: 50610.04 seconds


In [32]:
best_models

{'Logistic Regression': LogisticRegression(C=155.99552033620265, max_iter=300, solver='saga'),
 'Ridge Classifier': RidgeClassifier(alpha=1.9182496720710063, max_iter=100, solver='lsqr'),
 'Decision Tree': DecisionTreeClassifier(criterion='entropy', max_depth=17, max_features='log2',
                        max_leaf_nodes=17,
                        min_impurity_decrease=0.05986584841970366,
                        min_samples_leaf=0.025601864044243652,
                        min_samples_split=0.025599452033620268,
                        min_weight_fraction_leaf=0.005808361216819946,
                        splitter='random'),
 'Bagging Classifier': BaggingClassifier(base_estimator=None, bootstrap=False,
                   max_features=0.6039708314340944,
                   max_samples=0.7838501639099957, n_estimators=72, n_jobs=-1,
                   random_state=42),
 'Random Forest': RandomForestClassifier(class_weight='balanced', max_features='log2',
                        min_s

In [35]:
test_best_models(best_models, train_df_final, y_train_final, validation_df, y_val)

INFO:__main__:Logistic Regression - Train MCC Score: 0.2325
INFO:__main__:Logistic Regression - Validation MCC Score: 0.2336
INFO:__main__:Time elapsed: 80757.03 seconds
INFO:__main__:Ridge Classifier - Train MCC Score: 0.2340
INFO:__main__:Ridge Classifier - Validation MCC Score: 0.2350
INFO:__main__:Time elapsed: 80758.46 seconds
INFO:__main__:Decision Tree - Train MCC Score: 0.0000
INFO:__main__:Decision Tree - Validation MCC Score: 0.0000
INFO:__main__:Time elapsed: 80759.82 seconds
INFO:__main__:Bagging Classifier - Train MCC Score: 0.9990
INFO:__main__:Bagging Classifier - Validation MCC Score: 0.9828
INFO:__main__:Time elapsed: 80790.83 seconds
INFO:__main__:Random Forest - Train MCC Score: 0.9837
INFO:__main__:Random Forest - Validation MCC Score: 0.9821
INFO:__main__:Time elapsed: 80929.96 seconds
INFO:__main__:AdaBoost Classifier - Train MCC Score: 0.6550
INFO:__main__:AdaBoost Classifier - Validation MCC Score: 0.6553
INFO:__main__:Time elapsed: 81070.38 seconds
INFO:__main_



INFO:__main__:LightGBM - Train MCC Score: 0.9829
INFO:__main__:LightGBM - Validation MCC Score: 0.9820
INFO:__main__:Time elapsed: 81188.59 seconds


{'Logistic Regression': {'train_score': 0.2324614756680745,
  'val_score': 0.23360899287599296},
 'Ridge Classifier': {'train_score': 0.2340048746432381,
  'val_score': 0.23495087086016359},
 'Decision Tree': {'train_score': 0.0, 'val_score': 0.0},
 'Bagging Classifier': {'train_score': 0.9990228627484805,
  'val_score': 0.9828396823991475},
 'Random Forest': {'train_score': 0.9837255237834494,
  'val_score': 0.9820616573648003},
 'AdaBoost Classifier': {'train_score': 0.6549837497066474,
  'val_score': 0.6552920928202168},
 'Gradient Boosting': {'train_score': 0.9773062839941027,
  'val_score': 0.9770009203293313},
 'SVC': {'train_score': 0.08092725567027989, 'val_score': 0.07890100893560482},
 'XGBoost': {'train_score': 0.984003124781014,
  'val_score': 0.9824515407575631},
 'LightGBM': {'train_score': 0.982912095550089,
  'val_score': 0.9819841816648092}}

In [21]:
test_df_transformed.head(2)

Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season
0,0.500488,1.771484,0.736816,59.0,59.0,44.0,18.0,66.0,35.0,52.0,51.0,17.0,15.0,16.0,0.0
1,0.126709,-1.880859,-0.049866,50.0,53.0,45.0,5.0,66.0,17.0,54.0,38.0,6.0,14.0,16.0,0.0


In [22]:
label_encoder = LabelEncoder().fit(y_train)

In [23]:
train_df_transformed.head(2)

Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season,class
0,0.534668,-0.681152,0.523438,53.0,72.0,72.0,8.0,44.0,28.0,59.0,55.0,5.0,18.0,25.0,0.0,0
1,-0.386475,-0.577148,-0.577148,71.0,56.0,64.0,8.0,44.0,28.0,46.0,47.0,18.0,39.0,25.0,3.0,1


In [24]:
y_train

array([0, 1, 0, ..., 1, 0, 1])

In [25]:
train_df_transformed.drop('class', axis=1, inplace=True)
train_df_transformed.head(2)

Unnamed: 0,cap-diameter,stem-height,stem-width,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-color,has-ring,ring-type,habitat,season
0,0.534668,-0.681152,0.523438,53.0,72.0,72.0,8.0,44.0,28.0,59.0,55.0,5.0,18.0,25.0,0.0
1,-0.386475,-0.577148,-0.577148,71.0,56.0,64.0,8.0,44.0,28.0,46.0,47.0,18.0,39.0,25.0,3.0


In [26]:
logger.info("Starting cross-validation process")
logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

# Dictionary of the five best models based on the provided results
five_best_models = {
    'Bagging Classifier': BaggingClassifier(base_estimator=None, bootstrap=False,
                                            max_features=0.6039708314340944,
                                            max_samples=0.7838501639099957, n_estimators=72, n_jobs=-1,
                                            random_state=42),
    'Random Forest': RandomForestClassifier(class_weight='balanced', max_features='log2',
                                            min_samples_leaf=8, min_samples_split=8,
                                            n_estimators=171, oob_score=True, random_state=42,
                                            warm_start=True),
    'XGBoost': XGBClassifier(base_score=None, booster=None, callbacks=None,
                             colsample_bylevel=0.7324201991368923,
                             colsample_bynode=0.6984067959652569,
                             colsample_bytree=0.6585770133495129, device=None,
                             early_stopping_rounds=None, enable_categorical=False,
                             eval_metric=None, feature_types=None, gamma=0.3990553580730866,
                             grow_policy=None, importance_type=None,
                             interaction_constraints=None, learning_rate=0.23958788518583604,
                             max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
                             max_delta_step=5, max_depth=9, max_leaves=None,
                             min_child_weight=8, monotone_constraints=None,
                             multi_strategy=None, n_estimators=202, n_jobs=None,
                             num_parallel_tree=None, random_state=None),
    'LightGBM': LGBMClassifier(colsample_bytree=0.7085181455028883, device='gpu',
                               learning_rate=0.09554089088242068, max_depth=11,
                               min_child_samples=26, min_split_gain=0.18636874352935062,
                               n_estimators=233, num_leaves=117, reg_alpha=0.19897165336160308,
                               reg_lambda=0.13427322639962427,
                               scale_pos_weight=1.0239900845521561, subsample=0.595271788428719,
                               subsample_freq=8),
    'Gradient Boosting': GradientBoostingClassifier(criterion='squared_error',
                                                    learning_rate=0.1009124836035503, max_depth=7,
                                                    max_features='sqrt', min_samples_leaf=12,
                                                    min_samples_split=13, n_estimators=138,
                                                    random_state=42, subsample=0.645614570099021)
}

INFO:__main__:Starting cross-validation process
INFO:__main__:Time elapsed: 121.60 seconds


In [27]:
# Define the Train_ML function
def Train_ML(Model, X, y, test_data):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    train_scores = []
    val_scores = []
    test_predictions = []

    for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):
        # Handle indexing based on the type of X and y
        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
        else:
            X_train, X_val = X[train_index], X[val_index]
        
        if isinstance(y, pd.Series):
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]
        else:
            y_train, y_val = y[train_index], y[val_index]
        
        Model.fit(X_train, y_train)

        y_train_pred = Model.predict(X_train)
        train_mcc = matthews_corrcoef(y_train, y_train_pred)
        train_scores.append(train_mcc)

        y_val_pred = Model.predict(X_val)
        val_mcc = matthews_corrcoef(y_val, y_val_pred)
        val_scores.append(val_mcc)
        
        y_test_pred_proba = Model.predict(test_data)
        test_predictions.append(y_test_pred_proba)

        print(f"Fold {fold}: Train MCC = {train_mcc:.6f}, Validation MCC = {val_mcc:.6f}")
        logger.info(f"Fold {fold}: Train MCC = {train_mcc:.6f}, Validation MCC = {val_mcc:.6f}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

    mean_train_mcc = np.mean(train_scores)
    mean_val_mcc = np.mean(val_scores)

    print(f"\nMean Train MCC: {mean_train_mcc:.6f}")
    print(f"Mean Validation MCC: {mean_val_mcc:.6f}")
    logger.info(f"Mean Train MCC: {mean_train_mcc:.6f}, Mean Validation MCC: {mean_val_mcc:.6f}")
    logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

    return Model, test_predictions

In [28]:
y_train

array([0, 1, 0, ..., 1, 0, 1])

In [31]:
test_df_sub = pd.read_csv('test.csv')
test_df_sub.head(2)

Unnamed: 0,id,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,3116945,8.64,x,,n,t,,,w,11.13,...,b,,w,u,w,t,g,,d,a
1,3116946,6.9,o,t,o,f,,c,y,1.27,...,,,n,,,f,f,,d,a


In [32]:
# Perform cross-validation, fit on the entire training data, and predict for each model
for model_name, model in five_best_models.items():
    try:
        logger.info(f"Starting model training and cross-validation for {model_name}")
        trained_model, test_preds = Train_ML(model, train_df_transformed, y_train, test_df_transformed)

        # Averaging predictions across all folds
        final_test_preds = np.mean(test_preds, axis=0)
        final_test_preds_binary = (final_test_preds >= 0.5).astype(int)
        
        # Inverse transform the predictions to get the original class labels
        predictions = label_encoder.inverse_transform(final_test_preds_binary)

        # Log the prediction output
        logger.info(f"{model_name} - Test predictions done")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

        # Output predictions to a CSV file
        output_df = pd.DataFrame({'id': test_df_sub['id'], 'class': predictions})
        output_df.to_csv(f'Submission_01A_Dropped_Simple_{model_name}.csv', index=False)
        logger.info(f"Generated output file for {model_name}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

    except Exception as e:
        logger.error(f"An error occurred with {model_name}: {e}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

# Print completion message
print("Predictions for all five models have been saved to individual CSV files.")
logger.info("Predictions for all five models have been saved to individual CSV files.")
logger.info(f"Total Time elapsed: {time.time() - start_time:.2f} seconds")

INFO:__main__:Starting model training and cross-validation for Bagging Classifier
INFO:__main__:Fold 1: Train MCC = 0.998967, Validation MCC = 0.982903
INFO:__main__:Time elapsed: 4064.17 seconds


Fold 1: Train MCC = 0.998967, Validation MCC = 0.982903


INFO:__main__:Fold 2: Train MCC = 0.998962, Validation MCC = 0.982966
INFO:__main__:Time elapsed: 4633.79 seconds


Fold 2: Train MCC = 0.998962, Validation MCC = 0.982966


INFO:__main__:Fold 3: Train MCC = 0.998976, Validation MCC = 0.982578
INFO:__main__:Time elapsed: 5200.86 seconds


Fold 3: Train MCC = 0.998976, Validation MCC = 0.982578


INFO:__main__:Fold 4: Train MCC = 0.998988, Validation MCC = 0.982705
INFO:__main__:Time elapsed: 5761.81 seconds


Fold 4: Train MCC = 0.998988, Validation MCC = 0.982705


INFO:__main__:Fold 5: Train MCC = 0.998979, Validation MCC = 0.982979
INFO:__main__:Time elapsed: 6330.73 seconds
INFO:__main__:Mean Train MCC: 0.998974, Mean Validation MCC: 0.982826
INFO:__main__:Time elapsed: 6330.74 seconds
INFO:__main__:Bagging Classifier - Test predictions done
INFO:__main__:Time elapsed: 6330.87 seconds


Fold 5: Train MCC = 0.998979, Validation MCC = 0.982979

Mean Train MCC: 0.998974
Mean Validation MCC: 0.982826


INFO:__main__:Generated output file for Bagging Classifier
INFO:__main__:Time elapsed: 6333.49 seconds
INFO:__main__:Starting model training and cross-validation for Random Forest


KeyboardInterrupt: 

In [None]:
# Define MCC as the scoring metric
mcc_scorer = make_scorer(matthews_corrcoef)

# Perform cross-validation, fit on the entire training data, and predict for each model
predictions = {}
for model_name, model in five_best_models.items():
    try:
        logging.info(f"Performing cross-validation for {model_name}")
        scores = cross_val_score(model, train_df_transformed, y_train, cv=5, scoring=mcc_scorer, n_jobs=-1)
        logging.info(f"{model_name} - CV Scores: {scores}")
        logging.info(f"{model_name} - Mean CV Score: {np.mean(scores)}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

        # Fit the model on the entire training data
        model.fit(train_df_transformed, y_train)
    
        # Predict the output for test_df_transformed
        preds = model.predict(test_df_transformed)

        # Inverse transform the predictions to get the original class labels
        predictions[model_name] = label_encoder.inverse_transform(preds)

        # Log the prediction output
        logging.info(f"{model_name} - Test predictions done for {model_name}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

        # Output predictions to a CSV file
        output_df = pd.DataFrame({'id': test_df_sub['id'], 'class': predictions[model_name]})
        output_df.to_csv(f'Submission_01A(2)_Dropped_Simple_{model_name}.csv', index=False)
        print(output_df.head(2))
        logger.info(f"Generated output file - Time elapsed: {time.time() - start_time:.2f} seconds")

    except Exception as e:
        logger.error(f"An error occurred with {model_name}: {e}")
        logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")

# Print completion message
print("Predictions for all five models have been saved to individual CSV files.")
print("Predictions for all five models have been saved to individual CSV files.")
logger.info(f"Time elapsed: {time.time() - start_time:.2f} seconds")