In [1]:
!pip install optuna
!pip install catboost

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [2]:
import os
import numpy as np
import pandas as pd
import optuna
from optuna.samplers import TPESampler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
import xgboost as xgb

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:
from google.colab import drive
drive.mount('/content/drive')

# 데이터 로드
train_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/데이터/train_processed2.csv')
submit_test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/데이터/test_processed2.csv')

Mounted at /content/drive


# 데이터 분할

In [4]:
# Target 및 Train/Test Split
target_df = train_df['가격(백만원)']
train_df = train_df.drop(columns=['가격(백만원)'])
train_df, test_df, target_df, test_target_df = train_test_split(
    train_df, target_df, test_size=0.2, random_state=42
)

# 모델 함수 정의

In [5]:
# Optuna Objective 함수 정의
def objective_lightgbm(trial, train_df, target_df, test_df, test_target_df):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 10, 100),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
    }
    model = LGBMRegressor(**params, random_state=42)
    model.fit(train_df, target_df)
    predictions = model.predict(test_df)
    return np.sqrt(mean_squared_error(test_target_df, predictions))

def objective_catboost(trial, train_df, target_df, test_df, test_target_df):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'depth': trial.suggest_int('depth', 4, 15),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
    }
    model = CatBoostRegressor(**params, verbose=False, random_state=42)
    model.fit(train_df, target_df)
    predictions = model.predict(test_df)
    return np.sqrt(mean_squared_error(test_target_df, predictions))

def objective_xgb(trial, train_df, target_df, test_df, test_target_df):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
    }
    model = xgb.XGBRegressor(objective='reg:squarederror', **params)
    model.fit(train_df, target_df)
    predictions = model.predict(test_df)
    return np.sqrt(mean_squared_error(test_target_df, predictions))

# 추가 모델 최적화를 위한 함수 (Bagging, RandomForest, ExtraTrees, GradientBoosting, DecisionTree)
def objective_sklearn(trial, model_class, train_df, target_df, test_df, test_target_df):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000) if 'n_estimators' in model_class().get_params() else None,
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
    }
    params = {k: v for k, v in params.items() if v is not None}
    model = model_class(**params, random_state=42)
    model.fit(train_df, target_df)
    predictions = model.predict(test_df)
    return np.sqrt(mean_squared_error(test_target_df, predictions))

# Optuna 최적화 함수

In [6]:
# Optuna 최적화 함수
def optimize_model(model_name, objective_func, train_df, target_df, test_df, test_target_df):
    db_dir = "/content/optuna"
    os.makedirs(db_dir, exist_ok=True)
    storage_name = f"sqlite:///{db_dir}/optuna_{model_name}.db"

    try:
        study = optuna.create_study(
            storage=storage_name,
            study_name=model_name,
            direction='minimize',
            sampler=TPESampler(multivariate=True, seed=42),
        )
    except:
        study = optuna.load_study(study_name=model_name, storage=storage_name)

    study.optimize(
        lambda trial: objective_func(trial, train_df, target_df, test_df, test_target_df),
        n_trials=50,
    )
    return study

In [7]:
# 모델 최적화 및 예측
models = {
    'LightGBM': (objective_lightgbm, LGBMRegressor),
    'CatBoost': (objective_catboost, CatBoostRegressor),
    'XGBoost': (objective_xgb, xgb.XGBRegressor),
    'RandomForest': (objective_sklearn, RandomForestRegressor),
    'ExtraTrees': (objective_sklearn, ExtraTreesRegressor),
    'DecisionTree': (objective_sklearn, DecisionTreeRegressor),
    'GradientBoosting': (objective_sklearn, GradientBoostingRegressor),
}

In [None]:
import functools

# 모델 최적화 및 예측
studies = {}
predictions = {}

print("=== 모델 최적화 시작 ===")
for model_name, (objective_func, model_class) in models.items():
    print(f"Optimizing {model_name}...")

    if model_name in ['Bagging', 'RandomForest', 'ExtraTrees', 'DecisionTree', 'GradientBoosting']:
        # objective_sklearn을 사용하는 경우
        study_objective = functools.partial(
            objective_sklearn,
            model_class=model_class,
            train_df=train_df,
            target_df=target_df,
            test_df=test_df,
            test_target_df=test_target_df,
        )
    else:
        # LightGBM, CatBoost, XGBoost 등 다른 모델
        study_objective = functools.partial(
            objective_func,
            train_df=train_df,
            target_df=target_df,
            test_df=test_df,
            test_target_df=test_target_df,
        )

    # 모델 최적화
    studies[model_name] = optuna.create_study(
        study_name=model_name,
        direction='minimize',
        sampler=TPESampler(multivariate=True, seed=42),
    )
    studies[model_name].optimize(study_objective, n_trials=50)

    # 최적화된 모델로 예측 수행
    best_model = model_class(**studies[model_name].best_params, random_state=42)
    best_model.fit(train_df, target_df)
    predictions[model_name] = best_model.predict(test_df)

# 단순 평균 앙상블
ensemble_mean = sum(predictions.values()) / len(predictions)
rmse_mean = np.sqrt(mean_squared_error(test_target_df, ensemble_mean))
r2_mean = r2_score(test_target_df, ensemble_mean)

# 가중치 기반 앙상블
weights = [1 / studies[model].best_value for model in predictions.keys()]
weights = np.array(weights) / sum(weights)
ensemble_weighted = sum(weights[i] * list(predictions.values())[i] for i in range(len(weights)))
rmse_weighted = np.sqrt(mean_squared_error(test_target_df, ensemble_weighted))
r2_weighted = r2_score(test_target_df, ensemble_weighted)

# 결과 정리
results = {
    'Model': list(predictions.keys()) + ['Ensemble(Mean)', 'Ensemble(Weighted)'],
    'RMSE': [
        np.sqrt(mean_squared_error(test_target_df, predictions[model]))
        for model in predictions.keys()
    ] + [rmse_mean, rmse_weighted],
    'R2': [
        r2_score(test_target_df, predictions[model])
        for model in predictions.keys()
    ] + [r2_mean, r2_weighted],
}

results_df = pd.DataFrame(results)
print("\n=== 모델 성능 비교 ===")
print(results_df.round(4))


[I 2025-01-11 12:23:59,680] A new study created in memory with name: LightGBM


=== 모델 최적화 시작 ===
Optimizing LightGBM...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001316 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:00,767] Trial 0 finished with value: 1.4640699050951034 and parameters: {'n_estimators': 406, 'max_depth': 29, 'learning_rate': 0.029106359131330698, 'num_leaves': 64, 'subsample': 0.6624074561769746, 'colsample_bytree': 0.662397808134481}. Best is trial 0 with value: 1.4640699050951034.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000896 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:01,175] Trial 1 finished with value: 7.068737037916933 and parameters: {'n_estimators': 105, 'max_depth': 27, 'learning_rate': 0.015930522616241012, 'num_leaves': 74, 'subsample': 0.608233797718321, 'colsample_bytree': 0.9879639408647978}. Best is trial 0 with value: 1.4640699050951034.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:03,715] Trial 2 finished with value: 5.811184762184545 and parameters: {'n_estimators': 841, 'max_depth': 8, 'learning_rate': 0.0023102018878452934, 'num_leaves': 26, 'subsample': 0.7216968971838151, 'colsample_bytree': 0.8099025726528951}. Best is trial 0 with value: 1.4640699050951034.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001302 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:04,788] Trial 3 finished with value: 1.4188009151507626 and parameters: {'n_estimators': 460, 'max_depth': 11, 'learning_rate': 0.01673808578875214, 'num_leaves': 22, 'subsample': 0.7168578594140873, 'colsample_bytree': 0.7465447373174767}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002262 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:07,010] Trial 4 finished with value: 11.356839081209554 and parameters: {'n_estimators': 483, 'max_depth': 24, 'learning_rate': 0.002508115686045232, 'num_leaves': 56, 'subsample': 0.836965827544817, 'colsample_bytree': 0.6185801650879991}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001417 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:09,603] Trial 5 finished with value: 16.052365217971108 and parameters: {'n_estimators': 627, 'max_depth': 7, 'learning_rate': 0.0013492834268013251, 'num_leaves': 96, 'subsample': 0.9862528132298237, 'colsample_bytree': 0.9233589392465844}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000848 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:10,187] Trial 6 finished with value: 1.4842473709290458 and parameters: {'n_estimators': 339, 'max_depth': 5, 'learning_rate': 0.0233596350262616, 'num_leaves': 50, 'subsample': 0.6488152939379115, 'colsample_bytree': 0.798070764044508}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000926 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:10,446] Trial 7 finished with value: 28.334854127070425 and parameters: {'n_estimators': 82, 'max_depth': 28, 'learning_rate': 0.0032927591344236173, 'num_leaves': 70, 'subsample': 0.7246844304357644, 'colsample_bytree': 0.8080272084711243}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000885 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:11,574] Trial 8 finished with value: 1.5329638115220157 and parameters: {'n_estimators': 569, 'max_depth': 8, 'learning_rate': 0.08692991511139551, 'num_leaves': 80, 'subsample': 0.9757995766256756, 'colsample_bytree': 0.9579309401710595}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000834 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:12,633] Trial 9 finished with value: 14.93960434039101 and parameters: {'n_estimators': 618, 'max_depth': 28, 'learning_rate': 0.0015030900645056826, 'num_leaves': 27, 'subsample': 0.6180909155642152, 'colsample_bytree': 0.7301321323053057}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000822 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:13,066] Trial 10 finished with value: 5.4716673803144795 and parameters: {'n_estimators': 232, 'max_depth': 16, 'learning_rate': 0.008685230189567614, 'num_leaves': 26, 'subsample': 0.8143594328786764, 'colsample_bytree': 0.7477489748789169}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:14,918] Trial 11 finished with value: 1.5044984599593263 and parameters: {'n_estimators': 700, 'max_depth': 30, 'learning_rate': 0.02875479716035971, 'num_leaves': 75, 'subsample': 0.6810641783626817, 'colsample_bytree': 0.6011264004603101}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:15,410] Trial 12 finished with value: 1.4642029193548303 and parameters: {'n_estimators': 154, 'max_depth': 26, 'learning_rate': 0.06937056834856856, 'num_leaves': 87, 'subsample': 0.6165571272247296, 'colsample_bytree': 0.6051167755376659}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000835 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:16,128] Trial 13 finished with value: 1.439890888020304 and parameters: {'n_estimators': 298, 'max_depth': 30, 'learning_rate': 0.027641733585252536, 'num_leaves': 55, 'subsample': 0.696074767440041, 'colsample_bytree': 0.790799327063814}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000858 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:16,932] Trial 14 finished with value: 1.427924239183468 and parameters: {'n_estimators': 767, 'max_depth': 9, 'learning_rate': 0.038994948262025354, 'num_leaves': 15, 'subsample': 0.6573428614893667, 'colsample_bytree': 0.7343616976942932}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002317 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:17,798] Trial 15 finished with value: 1.4648595269354852 and parameters: {'n_estimators': 939, 'max_depth': 7, 'learning_rate': 0.07330740039512762, 'num_leaves': 11, 'subsample': 0.7017372887564708, 'colsample_bytree': 0.7864043300662426}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000864 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:18,568] Trial 16 finished with value: 1.5399872598578412 and parameters: {'n_estimators': 517, 'max_depth': 4, 'learning_rate': 0.022707741763328553, 'num_leaves': 18, 'subsample': 0.7313955300807439, 'colsample_bytree': 0.6497815637182882}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000842 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:19,217] Trial 17 finished with value: 1.4243705805406075 and parameters: {'n_estimators': 488, 'max_depth': 14, 'learning_rate': 0.07358635816077594, 'num_leaves': 11, 'subsample': 0.6355308758435183, 'colsample_bytree': 0.7893946820279283}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001336 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:20,050] Trial 18 finished with value: 1.4417164911778713 and parameters: {'n_estimators': 461, 'max_depth': 24, 'learning_rate': 0.07391003275033539, 'num_leaves': 22, 'subsample': 0.6607543567318205, 'colsample_bytree': 0.7058163784179765}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001508 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:21,170] Trial 19 finished with value: 1.4413010710328849 and parameters: {'n_estimators': 526, 'max_depth': 18, 'learning_rate': 0.04795275735568767, 'num_leaves': 26, 'subsample': 0.6341151103253511, 'colsample_bytree': 0.8726515253994049}. Best is trial 3 with value: 1.4188009151507626.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001316 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:21,573] Trial 20 finished with value: 1.4078593801347512 and parameters: {'n_estimators': 178, 'max_depth': 8, 'learning_rate': 0.07373662888814625, 'num_leaves': 18, 'subsample': 0.6211921657461162, 'colsample_bytree': 0.6516566354287385}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:22,296] Trial 21 finished with value: 1.4339392601650573 and parameters: {'n_estimators': 273, 'max_depth': 7, 'learning_rate': 0.05476692222595708, 'num_leaves': 34, 'subsample': 0.6171216940898576, 'colsample_bytree': 0.6335303036236999}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:22,606] Trial 22 finished with value: 1.5048279809558622 and parameters: {'n_estimators': 117, 'max_depth': 12, 'learning_rate': 0.07140065452794758, 'num_leaves': 15, 'subsample': 0.7244026230355648, 'colsample_bytree': 0.7068351927758977}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001348 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:23,238] Trial 23 finished with value: 1.4397248441010726 and parameters: {'n_estimators': 338, 'max_depth': 10, 'learning_rate': 0.0806172543206443, 'num_leaves': 16, 'subsample': 0.7127882597886986, 'colsample_bytree': 0.8321213703006768}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:23,642] Trial 24 finished with value: 8.421943934809894 and parameters: {'n_estimators': 130, 'max_depth': 10, 'learning_rate': 0.012039746736680299, 'num_leaves': 21, 'subsample': 0.6640851165539599, 'colsample_bytree': 0.6240172544040373}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002242 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:24,094] Trial 25 finished with value: 1.4244923507074103 and parameters: {'n_estimators': 410, 'max_depth': 12, 'learning_rate': 0.046338571029309286, 'num_leaves': 12, 'subsample': 0.6251945432543907, 'colsample_bytree': 0.7097526074796676}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002245 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:25,241] Trial 26 finished with value: 1.4186938259279454 and parameters: {'n_estimators': 637, 'max_depth': 15, 'learning_rate': 0.01826995884769093, 'num_leaves': 31, 'subsample': 0.7449699733046049, 'colsample_bytree': 0.7238743784226407}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000824 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:26,352] Trial 27 finished with value: 1.4438716484129226 and parameters: {'n_estimators': 569, 'max_depth': 15, 'learning_rate': 0.017104497069779682, 'num_leaves': 43, 'subsample': 0.7579262350440465, 'colsample_bytree': 0.716515910980612}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000834 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:27,044] Trial 28 finished with value: 1.5890603770632912 and parameters: {'n_estimators': 570, 'max_depth': 19, 'learning_rate': 0.014009438075601701, 'num_leaves': 14, 'subsample': 0.822564456021639, 'colsample_bytree': 0.6810775057419111}. Best is trial 20 with value: 1.4078593801347512.
[I 2025-01-11 12:24:27,214] Trial 29 finished with value: 2.866625608713725 and parameters: {'n_estimators': 131, 'max_depth': 4, 'learning_rate': 0.04194066459760547, 'num_leaves': 10, 'subsample': 0.705545121955966, 'colsample_bytree': 0.6003639789077128}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000879 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:27,778] Trial 30 finished with value: 2.8804012038499 and parameters: {'n_estimators': 442, 'max_depth': 7, 'learning_rate': 0.008393448174118932, 'num_leaves': 13, 'subsample': 0.8043695434513126, 'colsample_bytree': 0.8507257585199687}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:28,296] Trial 31 finished with value: 1.4523354922001466 and parameters: {'n_estimators': 542, 'max_depth': 10, 'learning_rate': 0.08040160833629705, 'num_leaves': 13, 'subsample': 0.647620094909413, 'colsample_bytree': 0.7874567478515141}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000876 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:29,907] Trial 32 finished with value: 1.436072915684763 and parameters: {'n_estimators': 701, 'max_depth': 19, 'learning_rate': 0.011491936118533854, 'num_leaves': 51, 'subsample': 0.7627839453237917, 'colsample_bytree': 0.898455323922507}. Best is trial 20 with value: 1.4078593801347512.
[I 2025-01-11 12:24:30,035] Trial 33 finished with value: 2.0746306812099236 and parameters: {'n_estimators': 62, 'max_depth': 5, 'learning_rate': 0.09536660786935426, 'num_leaves': 12, 'subsample': 0.6038964415038479, 'colsample_bytree': 0.7125739429208793}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000839 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001190 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:30,635] Trial 34 finished with value: 1.5021958441852425 and parameters: {'n_estimators': 552, 'max_depth': 14, 'learning_rate': 0.02012682331732156, 'num_leaves': 12, 'subsample': 0.6973187730717104, 'colsample_bytree': 0.8077057562436882}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002279 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:31,938] Trial 35 finished with value: 1.78019101509251 and parameters: {'n_estimators': 940, 'max_depth': 19, 'learning_rate': 0.006069831180816194, 'num_leaves': 15, 'subsample': 0.6770897230077353, 'colsample_bytree': 0.7349639297847599}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:33,234] Trial 36 finished with value: 1.59822330993279 and parameters: {'n_estimators': 731, 'max_depth': 10, 'learning_rate': 0.005917541961580408, 'num_leaves': 29, 'subsample': 0.7245031617786418, 'colsample_bytree': 0.711815807183886}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000850 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:36,987] Trial 37 finished with value: 1.548248038674579 and parameters: {'n_estimators': 800, 'max_depth': 15, 'learning_rate': 0.04720367665580551, 'num_leaves': 60, 'subsample': 0.6518825861393673, 'colsample_bytree': 0.6931599710652437}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006047 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:39,601] Trial 38 finished with value: 1.5029239324420873 and parameters: {'n_estimators': 846, 'max_depth': 12, 'learning_rate': 0.041488900220512395, 'num_leaves': 40, 'subsample': 0.8902080522029444, 'colsample_bytree': 0.6527227919145925}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000843 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:40,341] Trial 39 finished with value: 11.053267077161289 and parameters: {'n_estimators': 476, 'max_depth': 4, 'learning_rate': 0.0029493942724863344, 'num_leaves': 64, 'subsample': 0.8848081713336596, 'colsample_bytree': 0.7257358277204115}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:40,799] Trial 40 finished with value: 1.490139348229924 and parameters: {'n_estimators': 232, 'max_depth': 5, 'learning_rate': 0.03292316350952331, 'num_leaves': 45, 'subsample': 0.9898903030604024, 'colsample_bytree': 0.6811566860900125}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000968 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:41,214] Trial 41 finished with value: 1.4987798572177515 and parameters: {'n_estimators': 374, 'max_depth': 11, 'learning_rate': 0.037575808699301626, 'num_leaves': 10, 'subsample': 0.6430591409002271, 'colsample_bytree': 0.6958134809927548}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000834 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:41,867] Trial 42 finished with value: 4.716178502916702 and parameters: {'n_estimators': 379, 'max_depth': 8, 'learning_rate': 0.005785138355471643, 'num_leaves': 26, 'subsample': 0.6371842871868334, 'colsample_bytree': 0.7383543239931457}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000861 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:42,268] Trial 43 finished with value: 1.4149954986690958 and parameters: {'n_estimators': 232, 'max_depth': 19, 'learning_rate': 0.04234819606456933, 'num_leaves': 23, 'subsample': 0.8603387651050569, 'colsample_bytree': 0.969476601893713}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000895 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:42,658] Trial 44 finished with value: 1.4311481660502876 and parameters: {'n_estimators': 310, 'max_depth': 16, 'learning_rate': 0.06544521126902497, 'num_leaves': 16, 'subsample': 0.8443662283848451, 'colsample_bytree': 0.9889528033457071}. Best is trial 20 with value: 1.4078593801347512.
[I 2025-01-11 12:24:42,856] Trial 45 finished with value: 3.7019596165380784 and parameters: {'n_estimators': 95, 'max_depth': 25, 'learning_rate': 0.028204804864628587, 'num_leaves': 17, 'subsample': 0.7445614780066886, 'colsample_bytree': 0.9686495097149288}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000880 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000866 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:43,030] Trial 46 finished with value: 14.798667527152555 and parameters: {'n_estimators': 70, 'max_depth': 22, 'learning_rate': 0.013371025669610778, 'num_leaves': 21, 'subsample': 0.8842385937037798, 'colsample_bytree': 0.9055019437886054}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:45,283] Trial 47 finished with value: 1.507628174283715 and parameters: {'n_estimators': 725, 'max_depth': 13, 'learning_rate': 0.025880837105158486, 'num_leaves': 97, 'subsample': 0.9533488556444156, 'colsample_bytree': 0.6893600244375504}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001000 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:46,049] Trial 48 finished with value: 2.7526814536943625 and parameters: {'n_estimators': 342, 'max_depth': 11, 'learning_rate': 0.008203831184836164, 'num_leaves': 38, 'subsample': 0.937176137367587, 'colsample_bytree': 0.9895784051996989}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000873 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:46,769] Trial 49 finished with value: 1.4538863914710751 and parameters: {'n_estimators': 388, 'max_depth': 25, 'learning_rate': 0.039682692274679424, 'num_leaves': 36, 'subsample': 0.8217255639612671, 'colsample_bytree': 0.9507013201542869}. Best is trial 20 with value: 1.4078593801347512.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002210 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1211
[LightGBM] [Info] Number of data points in the train set: 5997, number of used features: 27
[LightGBM] [Info] Start training from score 62.221487


[I 2025-01-11 12:24:47,013] A new study created in memory with name: CatBoost


Optimizing CatBoost...


[I 2025-01-11 12:25:15,126] Trial 0 finished with value: 1.6062922264477426 and parameters: {'iterations': 437, 'learning_rate': 0.07969454818643935, 'depth': 12, 'l2_leaf_reg': 0.0024430162614261413, 'bagging_temperature': 0.15601864044243652}. Best is trial 0 with value: 1.6062922264477426.
[I 2025-01-11 12:25:32,203] Trial 1 finished with value: 27.56475323413315 and parameters: {'iterations': 240, 'learning_rate': 0.0013066739238053278, 'depth': 14, 'l2_leaf_reg': 0.002570603566117598, 'bagging_temperature': 0.7080725777960455}. Best is trial 0 with value: 1.6062922264477426.
[I 2025-01-11 12:25:44,565] Trial 2 finished with value: 1.4429982894584386 and parameters: {'iterations': 118, 'learning_rate': 0.08706020878304858, 'depth': 13, 'l2_leaf_reg': 8.148018307012941e-07, 'bagging_temperature': 0.18182496720710062}. Best is trial 2 with value: 1.4429982894584386.
[I 2025-01-11 12:25:47,865] Trial 3 finished with value: 13.67031330834471 and parameters: {'iterations': 265, 'learnin

**Extratree 가 가장 성능이 높게 나타남**

### Extratree Model

In [None]:
# ExtraTrees 모델의 최적화된 파라미터 가져오기
extra_trees_params = studies['ExtraTrees'].best_params

# ExtraTrees 모델 생성 및 학습
extra_trees_model = ExtraTreesRegressor(**extra_trees_params, random_state=42)
extra_trees_model.fit(train_df, target_df)  # 학습 데이터를 사용하여 모델 학습

# 테스트 데이터 예측
test_predictions = extra_trees_model.predict(test_df)

# RMSE 계산
train_predictions = extra_trees_model.predict(train_df)  # 학습 데이터 예측
train_rmse = np.sqrt(mean_squared_error(target_df, train_predictions))  # 학습 데이터 RMSE
test_rmse = np.sqrt(mean_squared_error(test_target_df, test_predictions))  # 테스트 데이터 RMSE

# 결과 출력
print(f"ExtraTrees 모델 학습 데이터 RMSE: {train_rmse:.4f}")
print(f"ExtraTrees 모델 테스트 데이터 RMSE: {test_rmse:.4f}")

In [None]:
# 테스트 데이터 예측
test_predictions = extra_trees_model.predict(submit_test_df)

In [None]:
# sample_submission.csv 불러오기
sample_submission_path = '/content/drive/MyDrive/Colab Notebooks/데이터/sample_submission.csv'
sample_submission = pd.read_csv(sample_submission_path)

# y_pred 값을 sample_submission에 삽입
sample_submission['가격(백만원)'] = test_predictions

# 제출 파일 저장
output_path = '/content/drive/MyDrive/Colab Notebooks/데이터/ET+optuna submission.csv'
sample_submission.to_csv(output_path, index=False)

print(f"Submission file saved to {output_path}")

# 앙상블

In [None]:
# 각 모델의 예측 수행
weighted_predictions = np.zeros(len(submit_test_df))

# 모델별 가중치 계산
weights = [1 / studies[model].best_value for model in predictions.keys()]
weights = np.array(weights) / sum(weights)  # 가중치 정규화

# 각 모델로 submit_test_df 예측
for i, model_name in enumerate(predictions.keys()):
    # 최적화된 모델 가져오기
    best_model = models[model_name][1](**studies[model_name].best_params, random_state=42)
    best_model.fit(train_df, target_df)  # 모델 학습
    model_predictions = best_model.predict(submit_test_df)  # 테스트 데이터 예측

    # 가중치 적용 후 누적
    weighted_predictions += weights[i] * model_predictions

In [None]:
# sample_submission.csv 불러오기
sample_submission_path = '/content/drive/MyDrive/Colab Notebooks/데이터/sample_submission.csv'
sample_submission = pd.read_csv(sample_submission_path)

# y_pred 값을 sample_submission에 삽입
sample_submission['가격(백만원)'] = weighted_predictions

# 제출 파일 저장
output_path = '/content/drive/MyDrive/Colab Notebooks/데이터/Ensamble+optuna submission.csv'
sample_submission.to_csv(output_path, index=False)

print(f"Submission file saved to {output_path}")