<a href="https://colab.research.google.com/github/tsato-code/colab_notebooks/blob/main/multioutputregressor_lightgbm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn import preprocessing

In [2]:
# データセット
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)

In [3]:
# 正解データ
price = boston.target
crim = df['CRIM'].values
df2 = df.drop('CRIM', axis=1)

In [4]:
# 正規化
sc = preprocessing.StandardScaler()
sc.fit(df2)
X = sc.transform(df2)

# priceとcrimをつないで正解データとする (=y)
y = np.array([price, crim]).T

# モデル学習用データのサイズの確認
print(X.shape)
print(y.shape)

(506, 12)
(506, 2)


In [5]:
# 訓練データとテストデータに分割する
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(379, 12) (127, 12)
(379, 2) (127, 2)


In [6]:
# lgbデータセット
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

# LightGBM のハイパーパラメータ
lgbm_params = {
    'objective': 'regressor'
}

In [7]:
# 多値予測モデルの生成
mor = MultiOutputRegressor(lgb.LGBMRegressor())

# 学習
mor.fit(X_train, y_train)

# 予測
y_pred = mor.predict(X_test)

# 予測結果のサイズ
print(y_pred.shape)

# 予測結果
print(y_pred[:5,])

(127, 2)
[[19.66690357  0.93913577]
 [22.3505095   0.13136284]
 [22.54444639  0.17166952]
 [ 9.94058369 14.10038351]
 [17.35378836  0.28635781]]


In [8]:
# RMSE
mse1 = mean_squared_error(y_test[:,0], y_pred[:,0])
rmse1 = np.sqrt(mse1)
print(rmse1)

mse2 = mean_squared_error(y_test[:,1], y_pred[:,1])
rmse2 = np.sqrt(mse2)
print(rmse2)

3.351356531952076
5.711845739307236
