In [2]:
import warnings
import pandas as pd
from sklearn.datasets import fetch_california_housing
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

warnings.filterwarnings('ignore')

# 데이터셋
cali = fetch_california_housing()
df = pd.DataFrame(cali.data, columns=cali.feature_names)
df['MedHouseVal'] = cali.target

X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

# train/test 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# LightGBM Regressor
reg = LGBMRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    reg_lambda=1.0,
    random_state=100,
    verbose = -1
)

reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

# 평가
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("LightGBM Results")
print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)
print()

# Feature Importance
print("[ Feature importance ]")
for i, feature in enumerate(cali.feature_names):
    print(f"{feature} : {reg.feature_importances_[i]}")


LightGBM Results
MSE: 0.20104002732065612
RMSE: 0.44837487365000295
R² Score: 0.8465822268781054

[ Feature importance ]
MedInc : 1935
HouseAge : 1346
AveRooms : 1412
AveBedrms : 1238
Population : 1377
AveOccup : 1926
Latitude : 2704
Longitude : 2802
