# モデル

このノートブックでは、前処理済みデータを用いてランダムフォレストによるモデルを構築し、評価します。

- 目的変数: `price_actual`
- モデル: ランダムフォレスト（RandomForestRegressor）
- 評価指標: RMSE
- ハイパーパラメータチューニング: GridSearchCV


## 1. ライブラリのインポートとデータ読み込み

In [6]:
# ファイルの存在確認
!ls -a ../..
# アクティベート
!source ../../.venv/bin/activate

[34m.[m[m                   .DS_Store           .python-version     README.md
[34m..[m[m                  [34m.git[m[m                [34m.venv[m[m               [34msignate_smbc_202506[m[m
[34m.cursor[m[m             .gitignore          pyproject.toml      uv.lock


In [10]:
# LightGBM特有のエラー対策
#!brew install libomp
#!pip uninstall lightgbm
#!pip install lightgbm

To reinstall 20.1.7, run:
  brew reinstall libomp
[0mCollecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl (1.6 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hInstalling collected packages: lightgbm
Successfully installed lightgbm-4.6.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [11]:
import pandas as pd
import numpy as np
from pathlib import Path
import lightgbm as lgb
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import optuna

# データディレクトリ
PROJECT_ROOT = Path.cwd().parent
DATA_DIR = PROJECT_ROOT / 'data'
print(DATA_DIR)
# 前処理済みデータの読み込み
train = pd.read_csv(DATA_DIR / 'train_processed.csv')
test = pd.read_csv(DATA_DIR / 'test_processed.csv')

print('train shape:', train.shape)
print('test shape:', test.shape)

/Users/m0122wt/Desktop/02.プライベート/01.ノウハウ/07.データ分析/notebook/signate_smbc_202506/data
train shape: (26280, 122)
test shape: (8760, 121)


## 2. 特徴量・目的変数の設定

In [12]:
# 目的変数
target_col = 'price_actual'

# 説明変数（目的変数とtime列以外）
drop_cols = ['time', target_col] if target_col in train.columns else ['time']
feature_cols = [col for col in train.columns if col not in drop_cols]

X = train[feature_cols]
y = train[target_col] if target_col in train.columns else train.iloc[:, -1]  # 念のため

print('Features:', feature_cols)
print('Target:', target_col)
print('X shape:', X.shape)
print('y shape:', y.shape)

Features: ['generation_biomass', 'generation_fossil_brown_coal/lignite', 'generation_fossil_gas', 'generation_fossil_hard_coal', 'generation_fossil_oil', 'generation_hydro_pumped_storage_consumption', 'generation_hydro_run_of_river_and_poundage', 'generation_hydro_water_reservoir', 'generation_nuclear', 'generation_other', 'generation_other_renewable', 'generation_solar', 'generation_waste', 'generation_wind_onshore', 'total_load_actual', 'valencia_temp', 'valencia_temp_min', 'valencia_temp_max', 'valencia_pressure', 'valencia_humidity', 'valencia_wind_speed', 'valencia_wind_deg', 'valencia_rain_1h', 'valencia_rain_3h', 'valencia_snow_3h', 'valencia_clouds_all', 'valencia_weather_id', 'valencia_weather_main', 'valencia_weather_description', 'valencia_weather_icon', 'madrid_temp', 'madrid_temp_min', 'madrid_temp_max', 'madrid_pressure', 'madrid_humidity', 'madrid_wind_speed', 'madrid_wind_deg', 'madrid_rain_1h', 'madrid_rain_3h', 'madrid_snow_3h', 'madrid_clouds_all', 'madrid_weather_id

## 3. 学習・検証データ分割

In [13]:
# ベイズ最適化によるLightGBMハイパーパラメータ探索
tscv = TimeSeriesSplit(n_splits=5)
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'verbose': -1,
        'random_state': 42
    }
    rmses = []
    for train_idx, valid_idx in tscv.split(X):
        X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]
        train_data = lgb.Dataset(X_train, y_train)
        valid_data = lgb.Dataset(X_valid, y_valid, reference=train_data)
        model = lgb.train(params, train_data, valid_sets=[valid_data], num_boost_round=1000, callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)])
        y_pred = model.predict(X_valid)
        rmse = root_mean_squared_error(y_valid, y_pred)
        rmses.append(rmse)
    return np.mean(rmses)

## 4. モデルの学習とハイパーパラメータチューニング

In [14]:
%%time
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200)

[I 2025-06-21 15:03:11,191] A new study created in memory with name: no-name-88266463-aad4-4507-baba-38801c372fd6


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[125]	valid_0's rmse: 7.19483
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[154]	valid_0's rmse: 19.5326
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.177
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.4907
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:19,389] Trial 0 finished with value: 11.466044223847428 and parameters: {'num_leaves': 68, 'learning_rate': 0.07018908504325128, 'feature_fraction': 0.9737891371331779, 'bagging_fraction': 0.67878004991326, 'bagging_freq': 7, 'min_child_samples': 78}. Best is trial 0 with value: 11.466044223847428.


Early stopping, best iteration is:
[228]	valid_0's rmse: 5.93511
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[40]	valid_0's rmse: 7.63082
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[32]	valid_0's rmse: 19.8278
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.3725
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.4898
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:25,454] Trial 1 finished with value: 11.675752134283288 and parameters: {'num_leaves': 79, 'learning_rate': 0.19022229392841394, 'feature_fraction': 0.8428230401053235, 'bagging_fraction': 0.766249268310335, 'bagging_freq': 3, 'min_child_samples': 24}. Best is trial 0 with value: 11.466044223847428.


Early stopping, best iteration is:
[70]	valid_0's rmse: 6.05789
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[154]	valid_0's rmse: 7.04219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[176]	valid_0's rmse: 18.7621
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.296
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.4711
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:32,326] Trial 2 finished with value: 11.065542044290794 and parameters: {'num_leaves': 35, 'learning_rate': 0.07492759888976204, 'feature_fraction': 0.7423968779713126, 'bagging_fraction': 0.8599792857740763, 'bagging_freq': 1, 'min_child_samples': 13}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[323]	valid_0's rmse: 5.75636
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[42]	valid_0's rmse: 7.35399
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[69]	valid_0's rmse: 19.6695
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.861
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.6234
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:35,044] Trial 3 finished with value: 11.681698022217018 and parameters: {'num_leaves': 26, 'learning_rate': 0.21731377886220057, 'feature_fraction': 0.9792185480699902, 'bagging_fraction': 0.8613841200206078, 'bagging_freq': 5, 'min_child_samples': 62}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[58]	valid_0's rmse: 5.90065
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[115]	valid_0's rmse: 7.29747
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[80]	valid_0's rmse: 19.0382
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.223
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.5802
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:41,881] Trial 4 finished with value: 11.426165744863258 and parameters: {'num_leaves': 83, 'learning_rate': 0.17701685948028695, 'feature_fraction': 0.8174530110703594, 'bagging_fraction': 0.8529094420183482, 'bagging_freq': 2, 'min_child_samples': 61}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[61]	valid_0's rmse: 5.99196
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[243]	valid_0's rmse: 7.1616
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[187]	valid_0's rmse: 19.5945
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.2473
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.4805
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:49,618] Trial 5 finished with value: 11.259954837291254 and parameters: {'num_leaves': 34, 'learning_rate': 0.045321092646612826, 'feature_fraction': 0.6170683782690959, 'bagging_fraction': 0.6611115693283852, 'bagging_freq': 3, 'min_child_samples': 91}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[486]	valid_0's rmse: 5.81597
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[86]	valid_0's rmse: 7.56971
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[52]	valid_0's rmse: 19.774
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.3016
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.5227
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:03:55,754] Trial 6 finished with value: 11.637334843787874 and parameters: {'num_leaves': 66, 'learning_rate': 0.1663841692798393, 'feature_fraction': 0.9861707583074992, 'bagging_fraction': 0.6662001700822167, 'bagging_freq': 4, 'min_child_samples': 27}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[87]	valid_0's rmse: 6.01872
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[822]	valid_0's rmse: 7.18556
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[569]	valid_0's rmse: 19.7478
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.1223
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[13]	valid_0's rmse: 13.2733
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:04:25,113] Trial 7 finished with value: 11.453897250713231 and parameters: {'num_leaves': 80, 'learning_rate': 0.010533846943643984, 'feature_fraction': 0.8344849291712476, 'bagging_fraction': 0.8056205577134862, 'bagging_freq': 7, 'min_child_samples': 71}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[752]	valid_0's rmse: 5.94059
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[76]	valid_0's rmse: 7.32537
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[106]	valid_0's rmse: 19.6092
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.1934
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.4931
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:04:31,578] Trial 8 finished with value: 11.517142316272679 and parameters: {'num_leaves': 84, 'learning_rate': 0.10731101766531921, 'feature_fraction': 0.8357902024785877, 'bagging_fraction': 0.6446311768408316, 'bagging_freq': 10, 'min_child_samples': 76}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[87]	valid_0's rmse: 5.96463
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[88]	valid_0's rmse: 7.29537
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 19.3662
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.5096
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.4884
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:04:37,888] Trial 9 finished with value: 11.158134355417216 and parameters: {'num_leaves': 51, 'learning_rate': 0.12646015267621144, 'feature_fraction': 0.6108922053263736, 'bagging_fraction': 0.992619268764923, 'bagging_freq': 6, 'min_child_samples': 44}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[170]	valid_0's rmse: 6.13108
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's rmse: 7.47517
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[50]	valid_0's rmse: 19.2809
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 12.0784
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.7622
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:04:41,361] Trial 10 finished with value: 11.462879135458387 and parameters: {'num_leaves': 46, 'learning_rate': 0.2872343919357754, 'feature_fraction': 0.715676011672338, 'bagging_fraction': 0.9642213306820715, 'bagging_freq': 1, 'min_child_samples': 14}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[29]	valid_0's rmse: 6.7178
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[99]	valid_0's rmse: 7.25915
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[94]	valid_0's rmse: 19.4632
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.4484
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.7543
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:04:47,415] Trial 11 finished with value: 11.159070470509231 and parameters: {'num_leaves': 46, 'learning_rate': 0.11161471527895528, 'feature_fraction': 0.6046601356162318, 'bagging_fraction': 0.9971407720836861, 'bagging_freq': 7, 'min_child_samples': 42}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[189]	valid_0's rmse: 5.87027
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[220]	valid_0's rmse: 7.40785
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[98]	valid_0's rmse: 19.2798
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.3451
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.5299
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:04:53,931] Trial 12 finished with value: 11.259454989379586 and parameters: {'num_leaves': 48, 'learning_rate': 0.10842010863554838, 'feature_fraction': 0.7177797593889603, 'bagging_fraction': 0.924288433118858, 'bagging_freq': 9, 'min_child_samples': 43}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[108]	valid_0's rmse: 5.73459
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[397]	valid_0's rmse: 6.96127
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[179]	valid_0's rmse: 19.2883
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.2888
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.6377
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:02,483] Trial 13 finished with value: 11.186511090031292 and parameters: {'num_leaves': 34, 'learning_rate': 0.0703312888950475, 'feature_fraction': 0.6957190711250911, 'bagging_fraction': 0.9081118188079677, 'bagging_freq': 5, 'min_child_samples': 11}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[246]	valid_0's rmse: 5.75642
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[93]	valid_0's rmse: 7.26931
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[136]	valid_0's rmse: 19.0135
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.2835
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[5]	valid_0's rmse: 13.4728
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:05,855] Trial 14 finished with value: 11.354468216528595 and parameters: {'num_leaves': 20, 'learning_rate': 0.13219235006945226, 'feature_fraction': 0.7564387543452658, 'bagging_fraction': 0.7365074906611639, 'bagging_freq': 1, 'min_child_samples': 39}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[200]	valid_0's rmse: 5.73319
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[57]	valid_0's rmse: 7.48565
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[20]	valid_0's rmse: 19.6606
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.7949
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.6314
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:12,649] Trial 15 finished with value: 11.408447984716375 and parameters: {'num_leaves': 97, 'learning_rate': 0.22132715744350157, 'feature_fraction': 0.6589342881765446, 'bagging_fraction': 0.8879919792070974, 'bagging_freq': 6, 'min_child_samples': 32}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[71]	valid_0's rmse: 6.46971
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[157]	valid_0's rmse: 7.16681
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[126]	valid_0's rmse: 19.3705
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.191
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.4036
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:20,971] Trial 16 finished with value: 11.38662431765608 and parameters: {'num_leaves': 56, 'learning_rate': 0.06332565138632464, 'feature_fraction': 0.7713224327720228, 'bagging_fraction': 0.9513609415113331, 'bagging_freq': 8, 'min_child_samples': 50}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[201]	valid_0's rmse: 5.80119
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[645]	valid_0's rmse: 7.09305
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[700]	valid_0's rmse: 19.0645
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.1269
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's rmse: 13.3543
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:41,362] Trial 17 finished with value: 11.279988407786373 and parameters: {'num_leaves': 37, 'learning_rate': 0.016930539888887555, 'feature_fraction': 0.9028503998826625, 'bagging_fraction': 0.8155863864662646, 'bagging_freq': 4, 'min_child_samples': 20}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[921]	valid_0's rmse: 5.76125
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[70]	valid_0's rmse: 7.53106
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[41]	valid_0's rmse: 19.6099
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.3248
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	valid_0's rmse: 12.0462
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:47,046] Trial 18 finished with value: 11.30258869460537 and parameters: {'num_leaves': 55, 'learning_rate': 0.141721059328024, 'feature_fraction': 0.6627936867575388, 'bagging_fraction': 0.7307209798200476, 'bagging_freq': 6, 'min_child_samples': 54}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[191]	valid_0's rmse: 6.00101
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[126]	valid_0's rmse: 7.0979
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[157]	valid_0's rmse: 19.3704
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.201
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.4351
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:52,758] Trial 19 finished with value: 11.402810753018208 and parameters: {'num_leaves': 40, 'learning_rate': 0.08896890624009572, 'feature_fraction': 0.8853704632294327, 'bagging_fraction': 0.6012642745857747, 'bagging_freq': 3, 'min_child_samples': 34}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[157]	valid_0's rmse: 5.90962
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[116]	valid_0's rmse: 7.65485
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[61]	valid_0's rmse: 19.2136
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 12.2853
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.3309
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:05:56,082] Trial 20 finished with value: 11.54167400394513 and parameters: {'num_leaves': 27, 'learning_rate': 0.26431501573433835, 'feature_fraction': 0.6485144883351844, 'bagging_fraction': 0.9930367959336219, 'bagging_freq': 8, 'min_child_samples': 98}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[106]	valid_0's rmse: 6.22366
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[77]	valid_0's rmse: 7.23931
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[96]	valid_0's rmse: 19.4248
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.4852
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	valid_0's rmse: 11.7778
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:02,398] Trial 21 finished with value: 11.166244701720451 and parameters: {'num_leaves': 49, 'learning_rate': 0.12072506424433155, 'feature_fraction': 0.626127216078305, 'bagging_fraction': 0.9984924000703088, 'bagging_freq': 7, 'min_child_samples': 44}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[168]	valid_0's rmse: 5.90418
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[158]	valid_0's rmse: 7.16223
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[257]	valid_0's rmse: 19.4538
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.2216
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	valid_0's rmse: 12.4605
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:17,300] Trial 22 finished with value: 11.209099061508327 and parameters: {'num_leaves': 63, 'learning_rate': 0.04265332275090439, 'feature_fraction': 0.6157301858893564, 'bagging_fraction': 0.9509881470152426, 'bagging_freq': 8, 'min_child_samples': 47}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[560]	valid_0's rmse: 5.74739
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[76]	valid_0's rmse: 7.23364
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[112]	valid_0's rmse: 19.6388
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.4073
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.9455
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:23,689] Trial 23 finished with value: 11.242557968438495 and parameters: {'num_leaves': 43, 'learning_rate': 0.09551868855345487, 'feature_fraction': 0.6029027633080397, 'bagging_fraction': 0.9250645542324114, 'bagging_freq': 6, 'min_child_samples': 36}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[236]	valid_0's rmse: 5.9876
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[64]	valid_0's rmse: 7.29959
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[67]	valid_0's rmse: 19.1142
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.5347
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.0179
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:28,994] Trial 24 finished with value: 11.215668106913109 and parameters: {'num_leaves': 53, 'learning_rate': 0.1507759893211375, 'feature_fraction': 0.6884282066436532, 'bagging_fraction': 0.8649864664392479, 'bagging_freq': 5, 'min_child_samples': 19}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[96]	valid_0's rmse: 6.11186
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[152]	valid_0's rmse: 7.18771
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[161]	valid_0's rmse: 19.0867
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.3399
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.6436
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:33,708] Trial 25 finished with value: 11.229498092008932 and parameters: {'num_leaves': 30, 'learning_rate': 0.08418425341624833, 'feature_fraction': 0.7509106693230787, 'bagging_fraction': 0.9662635638952843, 'bagging_freq': 10, 'min_child_samples': 61}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[125]	valid_0's rmse: 5.88965
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[60]	valid_0's rmse: 7.23607
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[56]	valid_0's rmse: 19.4359
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.1958
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 13.5114
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:40,293] Trial 26 finished with value: 11.49360143244527 and parameters: {'num_leaves': 71, 'learning_rate': 0.1264887270994555, 'feature_fraction': 0.7845814404063123, 'bagging_fraction': 0.8355886092494547, 'bagging_freq': 4, 'min_child_samples': 30}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[89]	valid_0's rmse: 6.08891
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[176]	valid_0's rmse: 7.11022
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[140]	valid_0's rmse: 19.4294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.1791
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 12.8403
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:49,126] Trial 27 finished with value: 11.265690000864232 and parameters: {'num_leaves': 59, 'learning_rate': 0.053699866739244206, 'feature_fraction': 0.6445596498193762, 'bagging_fraction': 0.9114646368582626, 'bagging_freq': 9, 'min_child_samples': 53}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[231]	valid_0's rmse: 5.76943
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[48]	valid_0's rmse: 7.44685
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[49]	valid_0's rmse: 19.296
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.4257
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 11.8738
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:06:52,759] Trial 28 finished with value: 11.25639655435475 and parameters: {'num_leaves': 40, 'learning_rate': 0.1877224026623449, 'feature_fraction': 0.6816813618343006, 'bagging_fraction': 0.7735113985672946, 'bagging_freq': 7, 'min_child_samples': 69}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[59]	valid_0's rmse: 6.23963
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[517]	valid_0's rmse: 7.0371
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[416]	valid_0's rmse: 19.6353
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 11.1828
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's rmse: 13.0942
Training until validation scores don't improve for 50 rounds


[I 2025-06-21 15:07:01,393] Trial 29 finished with value: 11.344784664461605 and parameters: {'num_leaves': 20, 'learning_rate': 0.02980950845681874, 'feature_fraction': 0.73353686833303, 'bagging_fraction': 0.9929744550513105, 'bagging_freq': 7, 'min_child_samples': 40}. Best is trial 2 with value: 11.065542044290794.


Early stopping, best iteration is:
[700]	valid_0's rmse: 5.7745
Best params: {'num_leaves': 35, 'learning_rate': 0.07492759888976204, 'feature_fraction': 0.7423968779713126, 'bagging_fraction': 0.8599792857740763, 'bagging_freq': 1, 'min_child_samples': 13}
Best CV RMSE: 11.065542044290794


In [18]:
print('LightGBM：ベイズ最適化の結果')
print('Best params:', study.best_params)
print('Best CV RMSE:', study.best_value)
best_params = study.best_params
best_params.update({
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'verbose': -1,
    'random_state': 42
})
train_data = lgb.Dataset(X, y)
best_model = lgb.train(best_params, train_data, num_boost_round=1000)

{'num_leaves': 35,
 'learning_rate': 0.07492759888976204,
 'feature_fraction': 0.7423968779713126,
 'bagging_fraction': 0.8599792857740763,
 'bagging_freq': 1,
 'min_child_samples': 13,
 'objective': 'regression',
 'metric': 'rmse',
 'boosting_type': 'gbdt',
 'verbose': -1,
 'random_state': 42}

履歴：ハイパーパラメータの設定  
ランダムフォレスト：グリッドサーチの結果  
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

Best parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}  
Best score: 19.873329344971545  

LightGBM：ベイズ最適化の結果
Best params: {'num_leaves': 35, 'learning_rate': 0.07492759888976204, 'feature_fraction': 0.7423968779713126, 'bagging_fraction': 0.8599792857740763, 'bagging_freq': 1, 'min_child_samples': 13}
Best CV RMSE: 11.065542044290794

## 5. 最適なモデルでの予測と評価

In [15]:
# 特徴量重要度で下位20%を除外し再学習
importances = best_model.feature_importance(importance_type='gain')
threshold = np.percentile(importances, 20)
selected_features = [f for f, imp in zip(feature_cols, importances) if imp > threshold]
print('Selected features:', selected_features)
train_data_selected = lgb.Dataset(X[selected_features], y)
best_model = lgb.train(best_params, train_data_selected, num_boost_round=1000)


Selected features: ['generation_biomass', 'generation_fossil_brown_coal/lignite', 'generation_fossil_gas', 'generation_fossil_hard_coal', 'generation_fossil_oil', 'generation_hydro_pumped_storage_consumption', 'generation_hydro_run_of_river_and_poundage', 'generation_hydro_water_reservoir', 'generation_nuclear', 'generation_other', 'generation_other_renewable', 'generation_solar', 'generation_waste', 'generation_wind_onshore', 'total_load_actual', 'valencia_temp', 'valencia_temp_min', 'valencia_temp_max', 'valencia_pressure', 'valencia_humidity', 'valencia_wind_speed', 'valencia_wind_deg', 'valencia_clouds_all', 'valencia_weather_icon', 'madrid_temp', 'madrid_temp_min', 'madrid_temp_max', 'madrid_pressure', 'madrid_humidity', 'madrid_wind_speed', 'madrid_wind_deg', 'madrid_clouds_all', 'madrid_weather_id', 'madrid_weather_icon', 'bilbao_temp', 'bilbao_temp_min', 'bilbao_temp_max', 'bilbao_pressure', 'bilbao_humidity', 'bilbao_wind_speed', 'bilbao_wind_deg', 'bilbao_clouds_all', 'bilbao

## 6. テストデータへの予測と保存

In [16]:
# テストデータ予測と提出ファイル出力（フォーマット厳守）
X_test = test[selected_features]
test_pred = best_model.predict(X_test)
submission = test[['time']].copy()
submission['price_actual_pred'] = test_pred
assert submission.iloc[0,0] == '2018-01-01 00:00:00+01:00', '1行1列目が要件を満たしません'
submission.to_csv(DATA_DIR / 'submission_lightgbm.csv', index=False, header=False)
print('Saved: submission_lightgbm.csv')

Saved: submission_lightgbm.csv


In [17]:
submission

Unnamed: 0,time,price_actual_pred
0,2018-01-01 00:00:00+01:00,34.088907
1,2018-01-01 01:00:00+01:00,33.004579
2,2018-01-01 02:00:00+01:00,35.177557
3,2018-01-01 03:00:00+01:00,34.863520
4,2018-01-01 04:00:00+01:00,34.141503
...,...,...
8755,2018-12-31 19:00:00+01:00,64.165839
8756,2018-12-31 20:00:00+01:00,67.592891
8757,2018-12-31 21:00:00+01:00,60.217003
8758,2018-12-31 22:00:00+01:00,55.172967
