In [1]:
import pandas as pd

train_df = pd.read_csv('train.csv')
test_df= pd.read_csv('test.csv')

In [2]:
train_df.isna().sum()

ID                        0
Age                       0
Gender                    0
Education_Status          0
Employment_Status         0
Working_Week (Yearly)     0
Industry_Status           0
Occupation_Status         0
Race                      0
Hispanic_Origin           0
Martial_Status            0
Household_Status          0
Household_Summary         0
Citizenship               0
Birth_Country             0
Birth_Country (Father)    0
Birth_Country (Mother)    0
Tax_Status                0
Gains                     0
Losses                    0
Dividends                 0
Income_Status             0
Income                    0
dtype: int64

In [3]:
train_df['Birth_Country'].value_counts()

Birth_Country
US                              17825
Mexico                            540
Unknown                           330
Puerto-Rico                       117
Philippines                       112
Germany                            90
Canada                             75
El-Salvador                        68
Cuba                               58
India                              55
Dominican-Republic                 49
England                            47
Poland                             45
Jamaica                            45
Columbia                           36
Italy                              35
South Korea                        31
Vietnam                            31
Ecuador                            31
Japan                              30
Portugal                           29
Nicaragua                          28
China                              28
Guatemala                          28
Haiti                              25
Iran                               2

In [4]:
from sklearn.preprocessing import LabelEncoder

columns_to_encode = ["Employment_Status", "Industry_Status", "Occupation_Status", 
                     "Race", "Hispanic_Origin", "Martial_Status", "Household_Summary", 
                     "Citizenship", "Tax_Status","Income_Status","Education_Status","Birth_Country"]

# 각 열에 대해 LabelEncoder 객체 생성 및 변환 수행
for column in columns_to_encode:
    label_encoder = LabelEncoder()
    # train 데이터에 fit한 후 transform
    train_df[column] = label_encoder.fit_transform(train_df[column])
    # test 데이터에는 동일한 encoder를 사용하여 transform
    test_df[column] = label_encoder.transform(test_df[column])

# 변경된 데이터 확인
train_df.head(), test_df.head()


(            ID  Age Gender  Education_Status  Employment_Status  \
 0  TRAIN_00000   63      M                15                  2   
 1  TRAIN_00001   37      M                 1                  2   
 2  TRAIN_00002   58      F                12                  2   
 3  TRAIN_00003   44      M                12                  2   
 4  TRAIN_00004   37      F                12                  2   
 
    Working_Week (Yearly)  Industry_Status  Occupation_Status  Race  \
 0                      4               20                 11     4   
 1                     52                6                 11     4   
 2                     52               11                  0     1   
 3                     52               19                 12     4   
 4                     52               19                 10     4   
 
    Hispanic_Origin  ...  Citizenship Birth_Country  Birth_Country (Father)  \
 0                0  ...            2            39                      US   
 1  

In [5]:
# 성별 변수 변환: 남성('M')은 1, 여성('F')은 0
train_df['Gender'] = train_df['Gender'].map({'M': 1, 'F': 0})
test_df['Gender'] = test_df['Gender'].map({'M': 1, 'F': 0})

# 변환 후 데이터 확인
train_df.head(), test_df.head()

(            ID  Age  Gender  Education_Status  Employment_Status  \
 0  TRAIN_00000   63       1                15                  2   
 1  TRAIN_00001   37       1                 1                  2   
 2  TRAIN_00002   58       0                12                  2   
 3  TRAIN_00003   44       1                12                  2   
 4  TRAIN_00004   37       0                12                  2   
 
    Working_Week (Yearly)  Industry_Status  Occupation_Status  Race  \
 0                      4               20                 11     4   
 1                     52                6                 11     4   
 2                     52               11                  0     1   
 3                     52               19                 12     4   
 4                     52               19                 10     4   
 
    Hispanic_Origin  ...  Citizenship Birth_Country  Birth_Country (Father)  \
 0                0  ...            2            39                      US  

In [6]:
import numpy as np

# 새로운 변수 생성
train_df['Age_Working_Week'] = train_df['Age'] + train_df['Working_Week (Yearly)']
train_df['Gains_Losses'] = train_df['Gains'] - train_df['Losses']
train_df['Age_Dividends'] = train_df['Age'] * train_df['Dividends']
train_df['Age_Losses'] = train_df['Age'] - train_df['Losses']
train_df['Working_Week_Dividends'] = train_df['Working_Week (Yearly)'] * train_df['Dividends']
train_df['Gains_Dividends'] = train_df['Gains'] * train_df['Dividends']
train_df['Working_Week_Losses'] = train_df['Working_Week (Yearly)'] - train_df['Losses']
train_df['Age_Working_Week_Gains'] = train_df['Age'] + train_df['Working_Week (Yearly)'] * train_df['Gains']
train_df['Working_Week_Gains_Losses'] = train_df['Working_Week (Yearly)'] * train_df['Gains'] - train_df['Losses']




# test 데이터셋에도 같은 작업을 수행
test_df['Age_Working_Week'] = test_df['Age'] + test_df['Working_Week (Yearly)']
test_df['Gains_Losses'] = test_df['Gains'] - test_df['Losses']
test_df['Age_Dividends'] = test_df['Age'] * test_df['Dividends']
test_df['Age_Losses'] = test_df['Age'] - test_df['Losses']
test_df['Working_Week_Dividends'] = test_df['Working_Week (Yearly)'] * test_df['Dividends']
test_df['Gains_Dividends'] = test_df['Gains'] * test_df['Dividends']
test_df['Working_Week_Losses'] = test_df['Working_Week (Yearly)'] - test_df['Losses']
test_df['Age_Working_Week_Gains'] = test_df['Age'] + test_df['Working_Week (Yearly)'] * test_df['Gains']
test_df['Working_Week_Gains_Losses'] = test_df['Working_Week (Yearly)'] * test_df['Gains'] - test_df['Losses']




In [7]:
# train_df에서 Income 변수를 y_train으로 할당
y_train = train_df['Income']

# train_df에서 ID, Household_Status, Income 변수 제거
train_df.drop(columns=['ID', 'Household_Status', 'Income','Birth_Country (Father)','Birth_Country (Mother)'], inplace=True)

# test_df에서 ID, Household_Status 변수 제거
test_df.drop(columns=['ID', 'Household_Status','Birth_Country (Father)','Birth_Country (Mother)'], inplace=True)

In [8]:
train_df.isna().sum()

Age                          0
Gender                       0
Education_Status             0
Employment_Status            0
Working_Week (Yearly)        0
Industry_Status              0
Occupation_Status            0
Race                         0
Hispanic_Origin              0
Martial_Status               0
Household_Summary            0
Citizenship                  0
Birth_Country                0
Tax_Status                   0
Gains                        0
Losses                       0
Dividends                    0
Income_Status                0
Age_Working_Week             0
Gains_Losses                 0
Age_Dividends                0
Age_Losses                   0
Working_Week_Dividends       0
Gains_Dividends              0
Working_Week_Losses          0
Age_Working_Week_Gains       0
Working_Week_Gains_Losses    0
dtype: int64

In [9]:
train_df.columns

Index(['Age', 'Gender', 'Education_Status', 'Employment_Status',
       'Working_Week (Yearly)', 'Industry_Status', 'Occupation_Status', 'Race',
       'Hispanic_Origin', 'Martial_Status', 'Household_Summary', 'Citizenship',
       'Birth_Country', 'Tax_Status', 'Gains', 'Losses', 'Dividends',
       'Income_Status', 'Age_Working_Week', 'Gains_Losses', 'Age_Dividends',
       'Age_Losses', 'Working_Week_Dividends', 'Gains_Dividends',
       'Working_Week_Losses', 'Age_Working_Week_Gains',
       'Working_Week_Gains_Losses'],
      dtype='object')

In [93]:
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split
import optuna
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from sklearn.ensemble import GradientBoostingRegressor
from catboost import CatBoostRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor

def objective(trial, X_train, X_valid, y_train, y_valid):
    lgb_params = {
        'num_leaves': trial.suggest_int('lgb_num_leaves', 10, 1000),
        'learning_rate': trial.suggest_uniform('lgb_learning_rate', 0.01, 0.1),
        'n_estimators': trial.suggest_int('lgb_n_estimators', 50, 500),
        'subsample': trial.suggest_uniform('lgb_subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('lgb_colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_uniform('lgb_reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_uniform('lgb_reg_lambda', 0.0, 1.0),
        'min_child_samples': trial.suggest_int('lgb_min_child_samples', 1, 100)
    }

    gb_params = {
        'n_estimators': trial.suggest_int('gb_n_estimators', 50, 1000),
        'learning_rate': trial.suggest_uniform('gb_learning_rate', 0.01, 0.1),
        'max_depth': trial.suggest_int('gb_max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('gb_min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('gb_min_samples_leaf', 1, 20),
    }

    cat_params = {
        'learning_rate': trial.suggest_uniform('cat_learning_rate', 0.01, 0.5),
        'iterations': trial.suggest_int('cat_iterations', 50, 1000),
        'depth': trial.suggest_int('cat_depth', 3, 10),
        'l2_leaf_reg': trial.suggest_uniform('cat_l2_leaf_reg', 0.1, 10.0),
        'random_strength': trial.suggest_uniform('cat_random_strength', 0.1, 10.0),
        'bagging_temperature': trial.suggest_uniform('cat_bagging_temperature', 0.0, 1.0),
        'border_count': trial.suggest_int('cat_border_count', 1, 255),
        'cat_features': [0, 1, 2, 9, 10, 11, 12, 13, 14, 15]
    }

    hgb_params = {
        'learning_rate': trial.suggest_uniform('hgb_learning_rate', 0.01, 0.5),
        'max_iter': trial.suggest_int('hgb_max_iter', 100, 1000),
        'max_depth': trial.suggest_int('hgb_max_depth', 3, 20),
        'min_samples_leaf': trial.suggest_int('hgb_min_samples_leaf', 1, 20),
        'l2_regularization': trial.suggest_uniform('hgb_l2_regularization', 0.1, 1.0)
    }

    # Initialize models with suggested parameters
    lgbm_model = lgb.LGBMRegressor(**lgb_params)
    gb_model = GradientBoostingRegressor(**gb_params)
    cat_model = CatBoostRegressor(**cat_params)
    hgb_model = HistGradientBoostingRegressor(categorical_features=[0, 1, 2, 9, 10, 11, 12, 13], **hgb_params)

    # Fit the models
    lgbm_model.fit(X_train, y_train)
    gb_model.fit(X_train, y_train)
    cat_model.fit(X_train, y_train)
    hgb_model.fit(X_train, y_train)

    # Predict
    lgbm_pred = lgbm_model.predict(X_valid)
    gb_pred = gb_model.predict(X_valid)
    cat_pred = cat_model.predict(X_valid)
    hgb_pred = hgb_model.predict(X_valid)

    # Calculate mean squared error
    lgbm_score = mean_squared_error(y_valid, lgbm_pred)
    gb_score = mean_squared_error(y_valid, gb_pred)
    cat_score = mean_squared_error(y_valid, cat_pred)
    hgb_score = mean_squared_error(y_valid, hgb_pred)

    # Average the scores
    avg_score = (lgbm_score + gb_score + cat_score + hgb_score) / 4

    return avg_score




In [94]:
# Split the data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(train_df, y_train, test_size=0.2, random_state=42)

# Create a study object and optimize the objective function
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, X_train, X_valid, y_train, y_valid), n_trials=10)

# Get the best hyperparameters
best_params = study.best_params


[I 2024-04-05 23:23:16,311] A new study created in memory with name: no-name-25cc7329-bc91-4375-80bc-116cd0e8abd5
  'learning_rate': trial.suggest_uniform('lgb_learning_rate', 0.01, 0.1),
  'subsample': trial.suggest_uniform('lgb_subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('lgb_colsample_bytree', 0.5, 1.0),
  'reg_alpha': trial.suggest_uniform('lgb_reg_alpha', 0.0, 1.0),
  'reg_lambda': trial.suggest_uniform('lgb_reg_lambda', 0.0, 1.0),
  'learning_rate': trial.suggest_uniform('gb_learning_rate', 0.01, 0.1),
  'learning_rate': trial.suggest_uniform('cat_learning_rate', 0.01, 0.5),
  'l2_leaf_reg': trial.suggest_uniform('cat_l2_leaf_reg', 0.1, 10.0),
  'random_strength': trial.suggest_uniform('cat_random_strength', 0.1, 10.0),
  'bagging_temperature': trial.suggest_uniform('cat_bagging_temperature', 0.0, 1.0),
  'learning_rate': trial.suggest_uniform('hgb_learning_rate', 0.01, 0.5),
  'l2_regularization': trial.suggest_uniform('hgb_l2_regularization', 0.1, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000766 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500






0:	learn: 657.3211469	total: 32.5ms	remaining: 9.25s
1:	learn: 640.1128404	total: 53.5ms	remaining: 7.59s
2:	learn: 631.1876772	total: 73.8ms	remaining: 6.96s
3:	learn: 625.5415123	total: 92.9ms	remaining: 6.55s
4:	learn: 624.2303556	total: 114ms	remaining: 6.38s
5:	learn: 621.8695884	total: 134ms	remaining: 6.26s
6:	learn: 620.7953544	total: 156ms	remaining: 6.2s
7:	learn: 620.1186827	total: 176ms	remaining: 6.1s
8:	learn: 618.2056365	total: 200ms	remaining: 6.14s
9:	learn: 617.0287165	total: 228ms	remaining: 6.28s
10:	learn: 616.6614808	total: 252ms	remaining: 6.29s
11:	learn: 616.0621383	total: 272ms	remaining: 6.21s
12:	learn: 615.3054317	total: 298ms	remaining: 6.26s
13:	learn: 615.1250994	total: 326ms	remaining: 6.33s
14:	learn: 615.1250974	total: 335ms	remaining: 6.05s
15:	learn: 614.5887848	total: 358ms	remaining: 6.05s
16:	learn: 613.5678418	total: 380ms	remaining: 6.01s
17:	learn: 613.5377221	total: 400ms	remaining: 5.95s
18:	learn: 613.3509397	total: 420ms	remaining: 5.9s
19

145:	learn: 578.9844081	total: 3.53s	remaining: 3.38s
146:	learn: 578.9569750	total: 3.55s	remaining: 3.36s
147:	learn: 578.7676737	total: 3.58s	remaining: 3.34s
148:	learn: 578.7450961	total: 3.62s	remaining: 3.32s
149:	learn: 578.7412895	total: 3.65s	remaining: 3.31s
150:	learn: 578.7390279	total: 3.68s	remaining: 3.29s
151:	learn: 578.7060641	total: 3.69s	remaining: 3.25s
152:	learn: 578.3340195	total: 3.71s	remaining: 3.22s
153:	learn: 578.3269689	total: 3.73s	remaining: 3.2s
154:	learn: 578.3268394	total: 3.77s	remaining: 3.18s
155:	learn: 578.2273831	total: 3.8s	remaining: 3.16s
156:	learn: 578.1378556	total: 3.83s	remaining: 3.15s
157:	learn: 578.1369667	total: 3.85s	remaining: 3.12s
158:	learn: 577.7536550	total: 3.87s	remaining: 3.09s
159:	learn: 577.7070153	total: 3.9s	remaining: 3.07s
160:	learn: 577.6720405	total: 3.92s	remaining: 3.04s
161:	learn: 577.6654995	total: 3.95s	remaining: 3.02s
162:	learn: 577.6650608	total: 3.96s	remaining: 2.99s
163:	learn: 577.5445226	total: 

[I 2024-04-05 23:24:05,247] Trial 0 finished with value: 366809.85824867815 and parameters: {'lgb_num_leaves': 927, 'lgb_learning_rate': 0.06277126975427344, 'lgb_n_estimators': 389, 'lgb_subsample': 0.9072622348623611, 'lgb_colsample_bytree': 0.902017705122953, 'lgb_reg_alpha': 0.6555760129414254, 'lgb_reg_lambda': 0.349361583779894, 'lgb_min_child_samples': 87, 'gb_n_estimators': 257, 'gb_learning_rate': 0.03183956138876816, 'gb_max_depth': 17, 'gb_min_samples_split': 9, 'gb_min_samples_leaf': 13, 'cat_learning_rate': 0.42297571181038973, 'cat_iterations': 286, 'cat_depth': 3, 'cat_l2_leaf_reg': 8.652099651136103, 'cat_random_strength': 4.21864934181204, 'cat_bagging_temperature': 0.8000964962587127, 'cat_border_count': 226, 'hgb_learning_rate': 0.36075794380555576, 'hgb_max_iter': 352, 'hgb_max_depth': 16, 'hgb_min_samples_leaf': 4, 'hgb_l2_regularization': 0.46509127553135277}. Best is trial 0 with value: 366809.85824867815.
  'learning_rate': trial.suggest_uniform('lgb_learning_ra

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000806 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500
0:	learn: 677.7659112	total: 78.8ms	remaining: 11.1s
1:	learn: 659.9527167	total: 156ms	remaining: 10.9s
2:	learn: 646.1738730	total: 219ms	remaining: 10.2s
3:	learn: 634.5727911	total: 282ms	remaining: 9.73s
4:	learn: 625.2535189	total: 340ms	remaining: 9.31s
5:	learn: 617.7927811	total: 400ms	remaining: 9.05s
6:	learn: 612.1539748	total: 464ms	remaining: 8.95s
7:	learn: 607.7137702	total: 537ms	remaining: 8.99s
8:	learn: 603.1688943	total: 624ms	remaining: 9.22s
9:	learn: 599.6891556	total: 715ms	remaining: 9.44s
10:	learn: 597.0924251	total: 785ms	remaining: 9.35s
11:	learn: 594.007

[I 2024-04-05 23:25:23,909] Trial 1 finished with value: 370879.9052907756 and parameters: {'lgb_num_leaves': 109, 'lgb_learning_rate': 0.0599255597775983, 'lgb_n_estimators': 159, 'lgb_subsample': 0.8066469189772345, 'lgb_colsample_bytree': 0.5798351663423659, 'lgb_reg_alpha': 0.4335250727850197, 'lgb_reg_lambda': 0.926470539160704, 'lgb_min_child_samples': 67, 'gb_n_estimators': 904, 'gb_learning_rate': 0.08791982512756351, 'gb_max_depth': 7, 'gb_min_samples_split': 3, 'gb_min_samples_leaf': 1, 'cat_learning_rate': 0.1393140844687003, 'cat_iterations': 142, 'cat_depth': 10, 'cat_l2_leaf_reg': 7.997604687420977, 'cat_random_strength': 0.2622589128334079, 'cat_bagging_temperature': 0.15459380027236258, 'cat_border_count': 84, 'hgb_learning_rate': 0.43407591907421433, 'hgb_max_iter': 531, 'hgb_max_depth': 16, 'hgb_min_samples_leaf': 4, 'hgb_l2_regularization': 0.6604847319133003}. Best is trial 0 with value: 366809.85824867815.
  'learning_rate': trial.suggest_uniform('lgb_learning_rate

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500
0:	learn: 672.0718897	total: 27.6ms	remaining: 7.61s
1:	learn: 652.1942585	total: 57.4ms	remaining: 7.89s
2:	learn: 644.7470736	total: 78.7ms	remaining: 7.19s
3:	learn: 640.7778108	total: 106ms	remaining: 7.25s
4:	learn: 636.9992035	total: 130ms	remaining: 7.05s
5:	learn: 635.3453774	total: 154ms	remaining: 6.95s
6:	learn: 631.2050833	total: 183ms	remaining: 7.04s
7:	learn: 629.0870674	total: 211ms	remaining: 7.09s
8:	learn: 625.2462107	total: 238ms	remaining: 7.08s
9:	learn: 624.7895463	total: 262ms	remaining: 6.99s
10:	learn: 623.0702701	total: 290ms	remaining: 7s
11:	learn: 621.6214

149:	learn: 578.3091137	total: 4.85s	remaining: 4.1s
150:	learn: 578.2358362	total: 4.88s	remaining: 4.07s
151:	learn: 578.1794077	total: 4.92s	remaining: 4.04s
152:	learn: 578.0936166	total: 4.95s	remaining: 4.01s
153:	learn: 577.9220872	total: 4.99s	remaining: 3.99s
154:	learn: 577.9098971	total: 5.03s	remaining: 3.96s
155:	learn: 577.8252934	total: 5.07s	remaining: 3.93s
156:	learn: 577.6839739	total: 5.11s	remaining: 3.9s
157:	learn: 577.5050584	total: 5.14s	remaining: 3.87s
158:	learn: 577.3600018	total: 5.18s	remaining: 3.84s
159:	learn: 577.2156526	total: 5.22s	remaining: 3.82s
160:	learn: 576.9480967	total: 5.25s	remaining: 3.79s
161:	learn: 576.9401303	total: 5.29s	remaining: 3.75s
162:	learn: 576.5759331	total: 5.32s	remaining: 3.72s
163:	learn: 576.3284775	total: 5.36s	remaining: 3.69s
164:	learn: 576.2274299	total: 5.39s	remaining: 3.66s
165:	learn: 576.1723335	total: 5.42s	remaining: 3.63s
166:	learn: 576.1267766	total: 5.46s	remaining: 3.59s
167:	learn: 576.1011157	total:

[I 2024-04-05 23:26:04,916] Trial 2 finished with value: 358544.1850521039 and parameters: {'lgb_num_leaves': 80, 'lgb_learning_rate': 0.08856391656202889, 'lgb_n_estimators': 138, 'lgb_subsample': 0.7938571236989019, 'lgb_colsample_bytree': 0.9016202740584827, 'lgb_reg_alpha': 0.96095800250082, 'lgb_reg_lambda': 0.5220401485447594, 'lgb_min_child_samples': 21, 'gb_n_estimators': 987, 'gb_learning_rate': 0.06899495748113199, 'gb_max_depth': 3, 'gb_min_samples_split': 12, 'gb_min_samples_leaf': 3, 'cat_learning_rate': 0.2806971986577452, 'cat_iterations': 277, 'cat_depth': 4, 'cat_l2_leaf_reg': 9.706635385160684, 'cat_random_strength': 7.351833081184607, 'cat_bagging_temperature': 0.8959051902105583, 'cat_border_count': 20, 'hgb_learning_rate': 0.09000242826110565, 'hgb_max_iter': 267, 'hgb_max_depth': 7, 'hgb_min_samples_leaf': 8, 'hgb_l2_regularization': 0.9394399140092757}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learning_rate', 0.

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001272 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500
0:	learn: 653.6934861	total: 35.9ms	remaining: 6.15s
1:	learn: 636.9151706	total: 73.7ms	remaining: 6.26s
2:	learn: 626.3062669	total: 101ms	remaining: 5.69s
3:	learn: 624.2349615	total: 145ms	remaining: 6.09s
4:	learn: 619.0426427	total: 185ms	remaining: 6.19s
5:	learn: 615.1793914	total: 226ms	remaining: 6.24s
6:	learn: 615.0663051	total: 249ms	remaining: 5.86s
7:	learn: 613.5921360	total: 288ms	remaining: 5.89s
8:	learn: 612.7890160	total: 334ms	remaining: 6.05s
9:	learn: 612.0596576	total: 358ms	remaining: 5.79s
10:	learn: 611.4983028	total: 413ms	remaining: 6.05s
11:	learn: 611.02

149:	learn: 523.8288622	total: 6.07s	remaining: 890ms
150:	learn: 523.7959743	total: 6.11s	remaining: 849ms
151:	learn: 523.7776060	total: 6.15s	remaining: 809ms
152:	learn: 522.7261618	total: 6.21s	remaining: 771ms
153:	learn: 522.7119246	total: 6.26s	remaining: 732ms
154:	learn: 522.6865719	total: 6.3s	remaining: 692ms
155:	learn: 521.9581604	total: 6.34s	remaining: 650ms
156:	learn: 521.1632075	total: 6.38s	remaining: 610ms
157:	learn: 520.1043229	total: 6.42s	remaining: 569ms
158:	learn: 520.0828372	total: 6.46s	remaining: 528ms
159:	learn: 519.4183189	total: 6.49s	remaining: 487ms
160:	learn: 519.1775524	total: 6.53s	remaining: 446ms
161:	learn: 519.1640541	total: 6.58s	remaining: 406ms
162:	learn: 519.0984432	total: 6.63s	remaining: 366ms
163:	learn: 519.0661860	total: 6.67s	remaining: 326ms
164:	learn: 518.8230442	total: 6.71s	remaining: 285ms
165:	learn: 517.8941007	total: 6.75s	remaining: 244ms
166:	learn: 517.8236670	total: 6.78s	remaining: 203ms
167:	learn: 517.2444341	total

[I 2024-04-05 23:26:22,339] Trial 3 finished with value: 364043.0411894186 and parameters: {'lgb_num_leaves': 354, 'lgb_learning_rate': 0.08386425003308688, 'lgb_n_estimators': 163, 'lgb_subsample': 0.7768604468777482, 'lgb_colsample_bytree': 0.9196992139087754, 'lgb_reg_alpha': 0.345802253548703, 'lgb_reg_lambda': 0.2331530845099491, 'lgb_min_child_samples': 10, 'gb_n_estimators': 118, 'gb_learning_rate': 0.05144582060782069, 'gb_max_depth': 7, 'gb_min_samples_split': 8, 'gb_min_samples_leaf': 9, 'cat_learning_rate': 0.46471436486916395, 'cat_iterations': 172, 'cat_depth': 7, 'cat_l2_leaf_reg': 8.820071098400975, 'cat_random_strength': 5.8893429007436, 'cat_bagging_temperature': 0.10778119834014455, 'cat_border_count': 151, 'hgb_learning_rate': 0.028204925153538687, 'hgb_max_iter': 174, 'hgb_max_depth': 17, 'hgb_min_samples_leaf': 20, 'hgb_l2_regularization': 0.3245360867443586}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learning_rate

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000921 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500


0:	learn: 673.2064308	total: 38.5ms	remaining: 34.7s
1:	learn: 652.2880826	total: 71.4ms	remaining: 32.2s
2:	learn: 640.5800618	total: 92.9ms	remaining: 27.9s
3:	learn: 633.8625759	total: 138ms	remaining: 31s
4:	learn: 628.4203307	total: 209ms	remaining: 37.5s
5:	learn: 624.6154163	total: 315ms	remaining: 47.1s
6:	learn: 619.8979749	total: 368ms	remaining: 47.1s
7:	learn: 617.0405986	total: 505ms	remaining: 56.5s
8:	learn: 614.7192404	total: 650ms	remaining: 1m 4s
9:	learn: 611.0380806	total: 768ms	remaining: 1m 8s
10:	learn: 609.6808078	total: 891ms	remaining: 1m 12s
11:	learn: 609.1166700	total: 920ms	remaining: 1m 8s
12:	learn: 606.9748940	total: 1.08s	remaining: 1m 14s
13:	learn: 606.5784552	total: 1.1s	remaining: 1m 10s
14:	learn: 606.0099013	total: 1.2s	remaining: 1m 10s
15:	learn: 605.6466751	total: 1.26s	remaining: 1m 10s
16:	learn: 605.4560022	total: 1.28s	remaining: 1m 7s
17:	learn: 604.4760590	total: 1.45s	remaining: 1m 11s
18:	learn: 601.6543636	total: 1.59s	remaining: 1m 1

153:	learn: 494.8709169	total: 18.4s	remaining: 1m 29s
154:	learn: 493.9659305	total: 18.5s	remaining: 1m 29s
155:	learn: 493.6564923	total: 18.6s	remaining: 1m 29s
156:	learn: 493.0335891	total: 18.8s	remaining: 1m 29s
157:	learn: 492.8388420	total: 18.9s	remaining: 1m 29s
158:	learn: 492.4768211	total: 19.1s	remaining: 1m 29s
159:	learn: 492.1050671	total: 19.2s	remaining: 1m 29s
160:	learn: 491.3784977	total: 19.3s	remaining: 1m 28s
161:	learn: 491.1778997	total: 19.4s	remaining: 1m 28s
162:	learn: 490.4556221	total: 19.6s	remaining: 1m 28s
163:	learn: 490.0410702	total: 19.7s	remaining: 1m 28s
164:	learn: 489.4577371	total: 19.9s	remaining: 1m 29s
165:	learn: 489.4516771	total: 20s	remaining: 1m 28s
166:	learn: 489.3546350	total: 20.1s	remaining: 1m 28s
167:	learn: 488.6241166	total: 20.2s	remaining: 1m 28s
168:	learn: 487.2173184	total: 20.3s	remaining: 1m 28s
169:	learn: 487.0463378	total: 20.4s	remaining: 1m 28s
170:	learn: 486.9538893	total: 20.6s	remaining: 1m 28s
171:	learn: 

304:	learn: 435.7826437	total: 38.4s	remaining: 1m 15s
305:	learn: 435.4206109	total: 38.5s	remaining: 1m 15s
306:	learn: 434.2917602	total: 38.7s	remaining: 1m 15s
307:	learn: 432.9937592	total: 38.8s	remaining: 1m 14s
308:	learn: 432.9201954	total: 39s	remaining: 1m 14s
309:	learn: 431.5279460	total: 39.1s	remaining: 1m 14s
310:	learn: 431.4872522	total: 39.3s	remaining: 1m 14s
311:	learn: 430.1613254	total: 39.4s	remaining: 1m 14s
312:	learn: 430.0063414	total: 39.5s	remaining: 1m 14s
313:	learn: 428.9962946	total: 39.7s	remaining: 1m 14s
314:	learn: 428.9054491	total: 39.8s	remaining: 1m 14s
315:	learn: 427.4113782	total: 39.9s	remaining: 1m 14s
316:	learn: 426.6957668	total: 40.1s	remaining: 1m 14s
317:	learn: 426.5526261	total: 40.2s	remaining: 1m 13s
318:	learn: 425.3730065	total: 40.3s	remaining: 1m 13s
319:	learn: 424.7231601	total: 40.5s	remaining: 1m 13s
320:	learn: 424.6215685	total: 40.6s	remaining: 1m 13s
321:	learn: 424.4153456	total: 40.8s	remaining: 1m 13s
322:	learn: 

456:	learn: 368.2194588	total: 59.1s	remaining: 57.7s
457:	learn: 368.0909155	total: 59.2s	remaining: 57.6s
458:	learn: 368.0016800	total: 59.4s	remaining: 57.5s
459:	learn: 367.6405888	total: 59.5s	remaining: 57.3s
460:	learn: 367.5727323	total: 59.7s	remaining: 57.2s
461:	learn: 366.6805303	total: 59.8s	remaining: 57.1s
462:	learn: 366.5048707	total: 60s	remaining: 57s
463:	learn: 365.8219025	total: 1m	remaining: 56.8s
464:	learn: 364.2308904	total: 1m	remaining: 56.7s
465:	learn: 364.1446155	total: 1m	remaining: 56.6s
466:	learn: 363.9766962	total: 1m	remaining: 56.4s
467:	learn: 363.7580934	total: 1m	remaining: 56.3s
468:	learn: 362.9742327	total: 1m	remaining: 56.2s
469:	learn: 362.6872725	total: 1m	remaining: 56.1s
470:	learn: 362.6450040	total: 1m 1s	remaining: 56s
471:	learn: 362.2532077	total: 1m 1s	remaining: 55.9s
472:	learn: 361.6462415	total: 1m 1s	remaining: 55.7s
473:	learn: 360.8575312	total: 1m 1s	remaining: 55.6s
474:	learn: 360.8414790	total: 1m 1s	remaining: 55.5s
4

609:	learn: 306.3166783	total: 1m 19s	remaining: 38.3s
610:	learn: 306.2669026	total: 1m 19s	remaining: 38.1s
611:	learn: 305.5539999	total: 1m 19s	remaining: 38s
612:	learn: 305.5287015	total: 1m 20s	remaining: 37.9s
613:	learn: 305.1110328	total: 1m 20s	remaining: 37.7s
614:	learn: 304.7690854	total: 1m 20s	remaining: 37.6s
615:	learn: 304.5831841	total: 1m 20s	remaining: 37.5s
616:	learn: 304.4506302	total: 1m 20s	remaining: 37.3s
617:	learn: 304.1021808	total: 1m 20s	remaining: 37.2s
618:	learn: 303.7165509	total: 1m 20s	remaining: 37.1s
619:	learn: 303.3102371	total: 1m 21s	remaining: 37s
620:	learn: 303.1293371	total: 1m 21s	remaining: 36.8s
621:	learn: 303.1125545	total: 1m 21s	remaining: 36.7s
622:	learn: 302.9657284	total: 1m 21s	remaining: 36.6s
623:	learn: 302.5433100	total: 1m 21s	remaining: 36.4s
624:	learn: 302.5147313	total: 1m 21s	remaining: 36.3s
625:	learn: 301.7972517	total: 1m 21s	remaining: 36.2s
626:	learn: 301.4080420	total: 1m 21s	remaining: 36s
627:	learn: 300.

760:	learn: 265.2159483	total: 1m 39s	remaining: 18.6s
761:	learn: 265.0256701	total: 1m 39s	remaining: 18.5s
762:	learn: 264.9910124	total: 1m 40s	remaining: 18.4s
763:	learn: 264.9117155	total: 1m 40s	remaining: 18.2s
764:	learn: 264.7232678	total: 1m 40s	remaining: 18.1s
765:	learn: 264.4808227	total: 1m 40s	remaining: 18s
766:	learn: 264.1655701	total: 1m 40s	remaining: 17.8s
767:	learn: 264.1423657	total: 1m 40s	remaining: 17.7s
768:	learn: 264.0657841	total: 1m 40s	remaining: 17.6s
769:	learn: 263.7408742	total: 1m 40s	remaining: 17.4s
770:	learn: 263.0696486	total: 1m 41s	remaining: 17.3s
771:	learn: 263.0112210	total: 1m 41s	remaining: 17.2s
772:	learn: 262.6991554	total: 1m 41s	remaining: 17s
773:	learn: 262.0648521	total: 1m 41s	remaining: 16.9s
774:	learn: 262.0352269	total: 1m 41s	remaining: 16.8s
775:	learn: 261.9527389	total: 1m 41s	remaining: 16.6s
776:	learn: 261.8496351	total: 1m 41s	remaining: 16.5s
777:	learn: 261.7974854	total: 1m 41s	remaining: 16.4s
778:	learn: 26

[I 2024-04-05 23:28:25,892] Trial 4 finished with value: 361371.2839001418 and parameters: {'lgb_num_leaves': 265, 'lgb_learning_rate': 0.0646148744211565, 'lgb_n_estimators': 110, 'lgb_subsample': 0.9085774022687747, 'lgb_colsample_bytree': 0.8237609683036989, 'lgb_reg_alpha': 0.3092342715901859, 'lgb_reg_lambda': 0.5577862581807392, 'lgb_min_child_samples': 60, 'gb_n_estimators': 51, 'gb_learning_rate': 0.05200495557446427, 'gb_max_depth': 6, 'gb_min_samples_split': 4, 'gb_min_samples_leaf': 17, 'cat_learning_rate': 0.25183617757110655, 'cat_iterations': 903, 'cat_depth': 10, 'cat_l2_leaf_reg': 6.401381578174114, 'cat_random_strength': 4.893483165332388, 'cat_bagging_temperature': 0.42764719088566605, 'cat_border_count': 84, 'hgb_learning_rate': 0.15562598125714566, 'hgb_max_iter': 272, 'hgb_max_depth': 14, 'hgb_min_samples_leaf': 5, 'hgb_l2_regularization': 0.852195715154271}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learning_rate'

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001373 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500
0:	learn: 676.1575387	total: 32.9ms	remaining: 12.1s
1:	learn: 659.0017923	total: 65.6ms	remaining: 12.1s
2:	learn: 645.3817598	total: 96.9ms	remaining: 11.9s
3:	learn: 636.9065056	total: 131ms	remaining: 12s
4:	learn: 633.7859652	total: 171ms	remaining: 12.5s
5:	learn: 628.8999633	total: 204ms	remaining: 12.4s
6:	learn: 624.6410077	total: 235ms	remaining: 12.2s
7:	learn: 622.3701429	total: 276ms	remaining: 12.5s
8:	learn: 621.2020983	total: 314ms	remaining: 12.6s
9:	learn: 620.2653617	total: 351ms	remaining: 12.6s
10:	learn: 618.8708921	total: 382ms	remaining: 12.5s
11:	learn: 617.110

151:	learn: 563.0734504	total: 5.64s	remaining: 8.1s
152:	learn: 563.0487520	total: 5.67s	remaining: 8.04s
153:	learn: 562.9238514	total: 5.7s	remaining: 7.99s
154:	learn: 562.8568490	total: 5.73s	remaining: 7.95s
155:	learn: 562.7738118	total: 5.77s	remaining: 7.91s
156:	learn: 562.2613023	total: 5.8s	remaining: 7.88s
157:	learn: 562.2342072	total: 5.84s	remaining: 7.83s
158:	learn: 562.0998842	total: 5.87s	remaining: 7.8s
159:	learn: 562.0690917	total: 5.91s	remaining: 7.75s
160:	learn: 561.9567620	total: 5.94s	remaining: 7.71s
161:	learn: 561.8632719	total: 5.97s	remaining: 7.66s
162:	learn: 561.7764019	total: 6s	remaining: 7.61s
163:	learn: 561.0992182	total: 6.03s	remaining: 7.57s
164:	learn: 561.0574984	total: 6.06s	remaining: 7.53s
165:	learn: 560.9802964	total: 6.09s	remaining: 7.49s
166:	learn: 560.8778231	total: 6.12s	remaining: 7.44s
167:	learn: 560.7789298	total: 6.15s	remaining: 7.39s
168:	learn: 560.7308523	total: 6.17s	remaining: 7.34s
169:	learn: 560.6542962	total: 6.21

306:	learn: 542.1506775	total: 11.3s	remaining: 2.32s
307:	learn: 542.0640840	total: 11.3s	remaining: 2.28s
308:	learn: 542.0538285	total: 11.4s	remaining: 2.24s
309:	learn: 542.0288643	total: 11.4s	remaining: 2.21s
310:	learn: 541.9918245	total: 11.4s	remaining: 2.17s
311:	learn: 541.9216764	total: 11.5s	remaining: 2.14s
312:	learn: 541.8889824	total: 11.5s	remaining: 2.1s
313:	learn: 541.8016481	total: 11.6s	remaining: 2.07s
314:	learn: 541.7815631	total: 11.6s	remaining: 2.03s
315:	learn: 541.7496302	total: 11.7s	remaining: 1.99s
316:	learn: 541.5397919	total: 11.7s	remaining: 1.96s
317:	learn: 541.3383115	total: 11.7s	remaining: 1.92s
318:	learn: 541.1491597	total: 11.8s	remaining: 1.88s
319:	learn: 540.8951065	total: 11.8s	remaining: 1.84s
320:	learn: 540.8479343	total: 11.8s	remaining: 1.8s
321:	learn: 540.5570984	total: 11.9s	remaining: 1.77s
322:	learn: 540.5162535	total: 11.9s	remaining: 1.73s
323:	learn: 540.4806077	total: 11.9s	remaining: 1.69s
324:	learn: 540.4495978	total:

[I 2024-04-05 23:28:46,818] Trial 5 finished with value: 371354.74543901626 and parameters: {'lgb_num_leaves': 201, 'lgb_learning_rate': 0.07775547311506202, 'lgb_n_estimators': 446, 'lgb_subsample': 0.7995146883837987, 'lgb_colsample_bytree': 0.8486507968309266, 'lgb_reg_alpha': 0.2578654168021304, 'lgb_reg_lambda': 0.9514629791872714, 'lgb_min_child_samples': 39, 'gb_n_estimators': 110, 'gb_learning_rate': 0.043417814804746074, 'gb_max_depth': 3, 'gb_min_samples_split': 8, 'gb_min_samples_leaf': 14, 'cat_learning_rate': 0.2166106092474898, 'cat_iterations': 370, 'cat_depth': 4, 'cat_l2_leaf_reg': 0.6245725984947657, 'cat_random_strength': 4.756975825951826, 'cat_bagging_temperature': 0.917268048542011, 'cat_border_count': 221, 'hgb_learning_rate': 0.04714335935970081, 'hgb_max_iter': 199, 'hgb_max_depth': 18, 'hgb_min_samples_leaf': 4, 'hgb_l2_regularization': 0.5736773592857051}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learning_ra

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001643 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500






0:	learn: 654.5663297	total: 41.1ms	remaining: 24.3s
1:	learn: 628.7959578	total: 84.9ms	remaining: 25s
2:	learn: 619.4142352	total: 120ms	remaining: 23.6s
3:	learn: 610.9852072	total: 181ms	remaining: 26.6s
4:	learn: 607.3721470	total: 240ms	remaining: 28.2s
5:	learn: 603.9358312	total: 308ms	remaining: 30s
6:	learn: 600.7633448	total: 375ms	remaining: 31.3s
7:	learn: 596.4296790	total: 464ms	remaining: 33.8s
8:	learn: 593.1955764	total: 549ms	remaining: 35.5s
9:	learn: 590.8968507	total: 637ms	remaining: 37.1s
10:	learn: 589.2456611	total: 726ms	remaining: 38.3s
11:	learn: 588.3494949	total: 804ms	remaining: 38.8s
12:	learn: 587.1919194	total: 873ms	remaining: 38.9s
13:	learn: 586.2096933	total: 926ms	remaining: 38.3s
14:	learn: 585.2448928	total: 985ms	remaining: 37.9s
15:	learn: 584.5617699	total: 1.04s	remaining: 37.6s
16:	learn: 583.9501709	total: 1.11s	remaining: 37.5s
17:	learn: 581.8941669	total: 1.19s	remaining: 38s
18:	learn: 579.8642444	total: 1.27s	remaining: 38.4s
19:	lea

44:	learn: 559.2172573	total: 2.94s	remaining: 35.7s
45:	learn: 558.8002034	total: 3.01s	remaining: 35.7s
46:	learn: 558.4110625	total: 3.07s	remaining: 35.6s
47:	learn: 557.5798276	total: 3.12s	remaining: 35.4s
48:	learn: 556.8153290	total: 3.17s	remaining: 35.2s
49:	learn: 556.6678366	total: 3.24s	remaining: 35.1s
50:	learn: 554.8902114	total: 3.32s	remaining: 35.2s
51:	learn: 554.6574989	total: 3.38s	remaining: 35s
52:	learn: 554.4474455	total: 3.42s	remaining: 34.8s
53:	learn: 554.2540384	total: 3.48s	remaining: 34.7s
54:	learn: 553.8489780	total: 3.55s	remaining: 34.6s
55:	learn: 552.3140949	total: 3.6s	remaining: 34.5s
56:	learn: 551.3633832	total: 3.67s	remaining: 34.5s
57:	learn: 550.8039888	total: 3.74s	remaining: 34.5s
58:	learn: 549.2635416	total: 3.8s	remaining: 34.4s
59:	learn: 549.0991582	total: 3.89s	remaining: 34.5s
60:	learn: 548.3707649	total: 3.95s	remaining: 34.4s
61:	learn: 547.5722843	total: 4s	remaining: 34.2s
62:	learn: 547.4987581	total: 4.06s	remaining: 34.1s


198:	learn: 490.8525380	total: 13.7s	remaining: 27s
199:	learn: 490.5178006	total: 13.8s	remaining: 27s
200:	learn: 490.3760608	total: 13.8s	remaining: 26.9s
201:	learn: 489.6931294	total: 13.9s	remaining: 26.9s
202:	learn: 489.4075158	total: 14s	remaining: 26.8s
203:	learn: 489.2058552	total: 14.1s	remaining: 26.8s
204:	learn: 489.0377024	total: 14.2s	remaining: 26.7s
205:	learn: 488.8775508	total: 14.2s	remaining: 26.7s
206:	learn: 488.6773010	total: 14.3s	remaining: 26.6s
207:	learn: 488.4578110	total: 14.4s	remaining: 26.6s
208:	learn: 488.0694278	total: 14.4s	remaining: 26.5s
209:	learn: 487.6990532	total: 14.5s	remaining: 26.4s
210:	learn: 487.5552463	total: 14.6s	remaining: 26.3s
211:	learn: 487.3843603	total: 14.6s	remaining: 26.2s
212:	learn: 486.9951053	total: 14.7s	remaining: 26.2s
213:	learn: 486.6223566	total: 14.8s	remaining: 26.1s
214:	learn: 486.3546936	total: 14.8s	remaining: 26s
215:	learn: 485.9955020	total: 14.9s	remaining: 25.9s
216:	learn: 485.6170952	total: 14.9s

351:	learn: 448.1352725	total: 24.7s	remaining: 16.8s
352:	learn: 448.1000096	total: 24.8s	remaining: 16.8s
353:	learn: 448.0405356	total: 24.8s	remaining: 16.7s
354:	learn: 447.6357820	total: 24.9s	remaining: 16.6s
355:	learn: 447.3564470	total: 25s	remaining: 16.5s
356:	learn: 447.2528050	total: 25.1s	remaining: 16.5s
357:	learn: 447.1509988	total: 25.1s	remaining: 16.4s
358:	learn: 447.0878050	total: 25.2s	remaining: 16.4s
359:	learn: 447.0273929	total: 25.3s	remaining: 16.3s
360:	learn: 446.8856681	total: 25.4s	remaining: 16.2s
361:	learn: 446.3355445	total: 25.4s	remaining: 16.1s
362:	learn: 446.1272449	total: 25.5s	remaining: 16.1s
363:	learn: 445.9316404	total: 25.6s	remaining: 16s
364:	learn: 445.5261235	total: 25.6s	remaining: 15.9s
365:	learn: 445.3676233	total: 25.7s	remaining: 15.9s
366:	learn: 445.3159295	total: 25.8s	remaining: 15.8s
367:	learn: 445.1780440	total: 25.9s	remaining: 15.8s
368:	learn: 444.6553104	total: 25.9s	remaining: 15.7s
369:	learn: 444.5591279	total: 2

506:	learn: 413.4943806	total: 35.8s	remaining: 6s
507:	learn: 412.8731313	total: 35.8s	remaining: 5.93s
508:	learn: 412.5995791	total: 35.9s	remaining: 5.86s
509:	learn: 412.3877641	total: 36s	remaining: 5.79s
510:	learn: 411.9927650	total: 36.1s	remaining: 5.72s
511:	learn: 411.7029794	total: 36.2s	remaining: 5.65s
512:	learn: 411.5301708	total: 36.2s	remaining: 5.58s
513:	learn: 411.3756008	total: 36.3s	remaining: 5.51s
514:	learn: 411.3165415	total: 36.4s	remaining: 5.44s
515:	learn: 411.3065466	total: 36.4s	remaining: 5.37s
516:	learn: 411.2398780	total: 36.5s	remaining: 5.3s
517:	learn: 411.0857420	total: 36.6s	remaining: 5.23s
518:	learn: 410.9451936	total: 36.7s	remaining: 5.16s
519:	learn: 410.8928139	total: 36.7s	remaining: 5.09s
520:	learn: 410.5921577	total: 36.8s	remaining: 5.02s
521:	learn: 410.1739580	total: 36.9s	remaining: 4.95s
522:	learn: 410.0888268	total: 37s	remaining: 4.88s
523:	learn: 410.0831782	total: 37s	remaining: 4.81s
524:	learn: 409.6934312	total: 37.1s	r

[I 2024-04-05 23:30:54,341] Trial 6 finished with value: 376806.2725492668 and parameters: {'lgb_num_leaves': 764, 'lgb_learning_rate': 0.03439662408478959, 'lgb_n_estimators': 430, 'lgb_subsample': 0.651773755760271, 'lgb_colsample_bytree': 0.6036600383043602, 'lgb_reg_alpha': 0.3598192421895884, 'lgb_reg_lambda': 0.005773488059482967, 'lgb_min_child_samples': 18, 'gb_n_estimators': 533, 'gb_learning_rate': 0.023497283704200005, 'gb_max_depth': 14, 'gb_min_samples_split': 3, 'gb_min_samples_leaf': 6, 'cat_learning_rate': 0.3351518760610676, 'cat_iterations': 592, 'cat_depth': 7, 'cat_l2_leaf_reg': 2.6595325159316583, 'cat_random_strength': 0.9768474024148481, 'cat_bagging_temperature': 0.015101814190600527, 'cat_border_count': 11, 'hgb_learning_rate': 0.4300540049585931, 'hgb_max_iter': 867, 'hgb_max_depth': 13, 'hgb_min_samples_leaf': 20, 'hgb_l2_regularization': 0.12913714184064748}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learnin

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003541 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500
0:	learn: 663.8773340	total: 36.4ms	remaining: 8.62s
1:	learn: 645.9788057	total: 74ms	remaining: 8.73s
2:	learn: 635.7556340	total: 100ms	remaining: 7.86s
3:	learn: 625.5421936	total: 167ms	remaining: 9.78s
4:	learn: 619.8862576	total: 225ms	remaining: 10.5s
5:	learn: 616.4997523	total: 278ms	remaining: 10.8s
6:	learn: 614.0031398	total: 350ms	remaining: 11.6s
7:	learn: 611.5560475	total: 403ms	remaining: 11.6s
8:	learn: 611.1419386	total: 452ms	remaining: 11.5s
9:	learn: 610.2234427	total: 508ms	remaining: 11.6s
10:	learn: 609.1756898	total: 560ms	remaining: 11.6s
11:	learn: 608.4658871	total: 588ms	remaining: 11.1s
12:	learn: 608.1557927	total: 

101:	learn: 520.1109438	total: 6.84s	remaining: 9.12s
102:	learn: 519.6307745	total: 6.89s	remaining: 9.03s
103:	learn: 518.5970613	total: 6.95s	remaining: 8.96s
104:	learn: 518.3806113	total: 7.03s	remaining: 8.9s
105:	learn: 517.0741964	total: 7.11s	remaining: 8.85s
106:	learn: 516.6420604	total: 7.2s	remaining: 8.81s
107:	learn: 516.3843738	total: 7.29s	remaining: 8.78s
108:	learn: 516.0464821	total: 7.38s	remaining: 8.73s
109:	learn: 515.5176537	total: 7.46s	remaining: 8.68s
110:	learn: 515.0662194	total: 7.55s	remaining: 8.64s
111:	learn: 514.7017860	total: 7.6s	remaining: 8.55s
112:	learn: 513.8695648	total: 7.66s	remaining: 8.48s
113:	learn: 513.0478718	total: 7.75s	remaining: 8.43s
114:	learn: 512.7702872	total: 7.83s	remaining: 8.37s
115:	learn: 512.1985075	total: 7.91s	remaining: 8.32s
116:	learn: 511.5033025	total: 8.01s	remaining: 8.29s
117:	learn: 510.8561393	total: 8.08s	remaining: 8.21s
118:	learn: 510.3131621	total: 8.14s	remaining: 8.14s
119:	learn: 509.4406066	total: 

[I 2024-04-05 23:33:12,124] Trial 7 finished with value: 368772.56723685784 and parameters: {'lgb_num_leaves': 168, 'lgb_learning_rate': 0.06932276988734817, 'lgb_n_estimators': 77, 'lgb_subsample': 0.9311452640250495, 'lgb_colsample_bytree': 0.6784838295368586, 'lgb_reg_alpha': 0.06064112931694887, 'lgb_reg_lambda': 0.8389072190848819, 'lgb_min_child_samples': 58, 'gb_n_estimators': 911, 'gb_learning_rate': 0.03412747381118909, 'gb_max_depth': 13, 'gb_min_samples_split': 17, 'gb_min_samples_leaf': 9, 'cat_learning_rate': 0.3510873285439152, 'cat_iterations': 238, 'cat_depth': 7, 'cat_l2_leaf_reg': 0.9980265029085869, 'cat_random_strength': 6.710775219509435, 'cat_bagging_temperature': 0.6177450346025499, 'cat_border_count': 130, 'hgb_learning_rate': 0.494032743957499, 'hgb_max_iter': 902, 'hgb_max_depth': 10, 'hgb_min_samples_leaf': 11, 'hgb_l2_regularization': 0.7863452987474675}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learning_ra

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000924 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500




0:	learn: 660.0186516	total: 53.2ms	remaining: 11.2s
1:	learn: 637.9269530	total: 101ms	remaining: 10.5s
2:	learn: 626.7753553	total: 162ms	remaining: 11.3s
3:	learn: 619.0676968	total: 239ms	remaining: 12.4s
4:	learn: 616.2434052	total: 321ms	remaining: 13.2s
5:	learn: 612.4215586	total: 395ms	remaining: 13.5s
6:	learn: 611.1145996	total: 446ms	remaining: 13s
7:	learn: 608.9739987	total: 508ms	remaining: 12.9s
8:	learn: 606.7843047	total: 573ms	remaining: 12.9s
9:	learn: 606.2864489	total: 657ms	remaining: 13.2s
10:	learn: 604.1989724	total: 727ms	remaining: 13.2s
11:	learn: 603.4270454	total: 784ms	remaining: 13s
12:	learn: 602.4983350	total: 843ms	remaining: 12.8s
13:	learn: 601.4390801	total: 890ms	remaining: 12.5s
14:	learn: 599.7109204	total: 944ms	remaining: 12.3s
15:	learn: 596.9866646	total: 1s	remaining: 12.3s
16:	learn: 596.4929530	total: 1.05s	remaining: 12s
17:	learn: 595.8742394	total: 1.12s	remaining: 12s
18:	learn: 593.7842982	total: 1.17s	remaining: 11.8s
19:	learn: 59

94:	learn: 514.7416622	total: 6.45s	remaining: 7.87s
95:	learn: 514.5767771	total: 6.54s	remaining: 7.83s
96:	learn: 514.2500376	total: 6.63s	remaining: 7.79s
97:	learn: 513.4684339	total: 6.71s	remaining: 7.74s
98:	learn: 513.0741675	total: 6.79s	remaining: 7.69s
99:	learn: 512.8891046	total: 6.88s	remaining: 7.64s
100:	learn: 512.6468792	total: 6.96s	remaining: 7.58s
101:	learn: 512.0531680	total: 7.04s	remaining: 7.52s
102:	learn: 511.8190582	total: 7.1s	remaining: 7.44s
103:	learn: 510.8351242	total: 7.16s	remaining: 7.37s
104:	learn: 510.5780300	total: 7.23s	remaining: 7.3s
105:	learn: 510.3107577	total: 7.3s	remaining: 7.23s
106:	learn: 509.8421399	total: 7.39s	remaining: 7.18s
107:	learn: 508.6843239	total: 7.47s	remaining: 7.12s
108:	learn: 508.1361974	total: 7.54s	remaining: 7.05s
109:	learn: 507.6072507	total: 7.63s	remaining: 7s
110:	learn: 507.1769555	total: 7.7s	remaining: 6.94s
111:	learn: 506.9751748	total: 7.77s	remaining: 6.87s
112:	learn: 506.8546157	total: 7.82s	rema

[I 2024-04-05 23:35:27,904] Trial 8 finished with value: 382308.428293381 and parameters: {'lgb_num_leaves': 725, 'lgb_learning_rate': 0.03638967407696524, 'lgb_n_estimators': 284, 'lgb_subsample': 0.7820512222381576, 'lgb_colsample_bytree': 0.9410743281181428, 'lgb_reg_alpha': 0.5072296914868429, 'lgb_reg_lambda': 0.7960013237534502, 'lgb_min_child_samples': 78, 'gb_n_estimators': 937, 'gb_learning_rate': 0.09710402499628412, 'gb_max_depth': 12, 'gb_min_samples_split': 20, 'gb_min_samples_leaf': 5, 'cat_learning_rate': 0.328837433027909, 'cat_iterations': 211, 'cat_depth': 7, 'cat_l2_leaf_reg': 0.6279408568646072, 'cat_random_strength': 3.540640371303645, 'cat_bagging_temperature': 0.7507208620872836, 'cat_border_count': 42, 'hgb_learning_rate': 0.4590408731999856, 'hgb_max_iter': 295, 'hgb_max_depth': 11, 'hgb_min_samples_leaf': 17, 'hgb_l2_regularization': 0.7843438893226962}. Best is trial 2 with value: 358544.1850521039.
  'learning_rate': trial.suggest_uniform('lgb_learning_rate'

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000742 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1998
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 556.153500
0:	learn: 667.0535642	total: 50.4ms	remaining: 10.6s
1:	learn: 644.5166621	total: 90.1ms	remaining: 9.41s
2:	learn: 632.5422129	total: 120ms	remaining: 8.35s
3:	learn: 626.8562207	total: 166ms	remaining: 8.61s
4:	learn: 624.9506961	total: 208ms	remaining: 8.58s
5:	learn: 622.8794272	total: 255ms	remaining: 8.71s
6:	learn: 621.0922895	total: 293ms	remaining: 8.55s
7:	learn: 619.9730634	total: 320ms	remaining: 8.13s
8:	learn: 617.8726857	total: 357ms	remaining: 8.01s
9:	learn: 616.5681721	total: 396ms	remaining: 7.95s
10:	learn: 615.2569870	total: 438ms	remaining: 7.96s
11:	learn: 614.99

150:	learn: 552.6358628	total: 6.74s	remaining: 2.68s
151:	learn: 552.4550949	total: 6.78s	remaining: 2.63s
152:	learn: 551.6639428	total: 6.83s	remaining: 2.59s
153:	learn: 551.6364019	total: 6.88s	remaining: 2.54s
154:	learn: 551.1916583	total: 6.93s	remaining: 2.5s
155:	learn: 551.0451058	total: 6.98s	remaining: 2.46s
156:	learn: 550.9346866	total: 7.04s	remaining: 2.42s
157:	learn: 550.7890530	total: 7.08s	remaining: 2.38s
158:	learn: 550.5271875	total: 7.12s	remaining: 2.33s
159:	learn: 550.1232713	total: 7.15s	remaining: 2.28s
160:	learn: 550.0151503	total: 7.18s	remaining: 2.23s
161:	learn: 549.8806966	total: 7.21s	remaining: 2.18s
162:	learn: 549.3738593	total: 7.26s	remaining: 2.14s
163:	learn: 549.2359061	total: 7.33s	remaining: 2.1s
164:	learn: 549.1555886	total: 7.38s	remaining: 2.06s
165:	learn: 549.0681896	total: 7.42s	remaining: 2.01s
166:	learn: 548.9356049	total: 7.46s	remaining: 1.97s
167:	learn: 548.7605598	total: 7.53s	remaining: 1.93s
168:	learn: 548.6779566	total:

[I 2024-04-05 23:36:03,854] Trial 9 finished with value: 363390.51343181945 and parameters: {'lgb_num_leaves': 45, 'lgb_learning_rate': 0.09941602847139099, 'lgb_n_estimators': 293, 'lgb_subsample': 0.7434763656708885, 'lgb_colsample_bytree': 0.6726979818053713, 'lgb_reg_alpha': 0.5885734782419166, 'lgb_reg_lambda': 0.03335004548131648, 'lgb_min_child_samples': 65, 'gb_n_estimators': 614, 'gb_learning_rate': 0.07516875407037864, 'gb_max_depth': 4, 'gb_min_samples_split': 4, 'gb_min_samples_leaf': 16, 'cat_learning_rate': 0.3160077743558829, 'cat_iterations': 211, 'cat_depth': 5, 'cat_l2_leaf_reg': 2.839962738833861, 'cat_random_strength': 5.437841597238916, 'cat_bagging_temperature': 0.6030100855301798, 'cat_border_count': 164, 'hgb_learning_rate': 0.39600619705098267, 'hgb_max_iter': 716, 'hgb_max_depth': 10, 'hgb_min_samples_leaf': 4, 'hgb_l2_regularization': 0.6632935476799618}. Best is trial 2 with value: 358544.1850521039.


In [95]:
best_params

{'lgb_num_leaves': 80,
 'lgb_learning_rate': 0.08856391656202889,
 'lgb_n_estimators': 138,
 'lgb_subsample': 0.7938571236989019,
 'lgb_colsample_bytree': 0.9016202740584827,
 'lgb_reg_alpha': 0.96095800250082,
 'lgb_reg_lambda': 0.5220401485447594,
 'lgb_min_child_samples': 21,
 'gb_n_estimators': 987,
 'gb_learning_rate': 0.06899495748113199,
 'gb_max_depth': 3,
 'gb_min_samples_split': 12,
 'gb_min_samples_leaf': 3,
 'cat_learning_rate': 0.2806971986577452,
 'cat_iterations': 277,
 'cat_depth': 4,
 'cat_l2_leaf_reg': 9.706635385160684,
 'cat_random_strength': 7.351833081184607,
 'cat_bagging_temperature': 0.8959051902105583,
 'cat_border_count': 20,
 'hgb_learning_rate': 0.09000242826110565,
 'hgb_max_iter': 267,
 'hgb_max_depth': 7,
 'hgb_min_samples_leaf': 8,
 'hgb_l2_regularization': 0.9394399140092757}

In [107]:
y_train.shape

(20000,)

In [10]:
import optuna
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

# 하이퍼파라미터
lgb_params = {
    'num_leaves': 80,
    'learning_rate': 0.08856391656202889,
    'n_estimators': 138,
    'subsample': 0.7938571236989019,
    'colsample_bytree': 0.9016202740584827,
    'reg_alpha': 0.96095800250082,
    'reg_lambda': 0.5220401485447594,
    'min_child_samples': 21
}

xgb_params = {
    'n_estimators': 987,
    'learning_rate': 0.06899495748113199,
    'max_depth': 3,
    'min_samples_split': 12,
    'min_samples_leaf': 3
}

catboost_params = {
    'learning_rate': 0.2806971986577452,
    'iterations': 277,
    'depth': 4,
    'l2_leaf_reg': 9.706635385160684,
    'random_strength': 7.351833081184607,
    'bagging_temperature': 0.8959051902105583,
    'border_count': 20
}

hgb_params = {
    'learning_rate': 0.09000242826110565,
    'max_iter': 267,
    'max_depth': 7,
    'min_samples_leaf': 8,
    'l2_regularization': 0.9394399140092757
}

# 모델 인스턴스 생성
lgb_model = LGBMRegressor(**lgb_params)
xgb_model = XGBRegressor(**xgb_params)
catboost_model = CatBoostRegressor(**catboost_params, verbose=False)
gb_model = GradientBoostingRegressor()
hgb_model = HistGradientBoostingRegressor()

# StackingRegressor 정의
estimators = [('lgb', lgb_model), ('xgb', xgb_model), ('catboost', catboost_model), ('gb', gb_model), ('hgb', hgb_model)]
stacking_regressor = StackingRegressor(
    estimators=estimators,
    final_estimator=LinearRegression()
)

# StackingRegressor 학습
stacking_regressor.fit(train_df, y_train)

# 테스트 데이터에 대한 예측 수행
predictions = stacking_regressor.predict(test_df)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004672 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2134
[LightGBM] [Info] Number of data points in the train set: 20000, number of used features: 27
[LightGBM] [Info] Start training from score 554.565250


Parameters: { "min_samples_leaf", "min_samples_split" } are not used.



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2002
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 554.683937
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002968 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1995
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 27
[LightGBM] [Info] Start training from score 550.044437
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003179 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not e

Parameters: { "min_samples_leaf", "min_samples_split" } are not used.

Parameters: { "min_samples_leaf", "min_samples_split" } are not used.

Parameters: { "min_samples_leaf", "min_samples_split" } are not used.

Parameters: { "min_samples_leaf", "min_samples_split" } are not used.

Parameters: { "min_samples_leaf", "min_samples_split" } are not used.



In [109]:
# 테스트 데이터에 대한 예측
predictions = voting_regressor.predict(test_df)

# 예측 결과 출력 또는 활용
print(predictions)

[ -6.54062976 -13.25861202 397.7082623  ... 410.23424638   3.90529202
 719.82497623]


In [11]:
predictions

array([ -4.81647889,   1.03780262, 415.47446541, ..., 376.48426799,
        -4.30677487, 664.50258389])

In [12]:
# 예측값이 0보다 작으면 0으로 처리
predictions = np.maximum(predictions, 0)

# 처리된 예측값 출력 또는 활용
print(predictions)

[  0.           1.03780262 415.47446541 ... 376.48426799   0.
 664.50258389]


In [13]:
submit = pd.read_csv("sample_submission.csv")

In [14]:
submit['Income'] =predictions
submit

Unnamed: 0,ID,Income
0,TEST_0000,0.000000
1,TEST_0001,1.037803
2,TEST_0002,415.474465
3,TEST_0003,627.798290
4,TEST_0004,0.000000
...,...,...
9995,TEST_9995,864.289624
9996,TEST_9996,807.790938
9997,TEST_9997,376.484268
9998,TEST_9998,0.000000


In [15]:
submit.to_csv('stacking1.csv', index=False)