In [4]:
import warnings
import pandas as pd
from sklearn.datasets import fetch_california_housing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

warnings.filterwarnings('ignore')

# 데이터셋 (캘리포니아 집값 데이터)
cali = fetch_california_housing()
df = pd.DataFrame(cali.data, columns=cali.feature_names)
df['MedHouseVal'] = cali.target

X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

# 학습/테스트 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# XGBoost Regressor 모델 정의
reg = XGBRegressor(
    n_estimators=500,         # 트리 개수
    learning_rate=0.05,       # 학습률
    max_depth=6,              # 트리 최대 깊이
    reg_lambda=1,             # L2 규제
    objective="reg:squarederror",  # 회귀 목적 함수
    random_state=100,
    eval_metric="rmse"
)

# 모델 학습
reg.fit(X_train, y_train)

# 예측
y_pred = reg.predict(X_test)

# 평가
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)
print()

# Feature Importance
print("[ Feature importance ]")
for i, feature in enumerate(cali.feature_names):
    print(f"{feature} : {reg.feature_importances_[i]}")

MSE: 0.20804038744895811
RMSE: 0.4561144455604954
R² Score: 0.841240108314709

[ Feature importance ]
MedInc : 0.49667760729789734
HouseAge : 0.06738278269767761
AveRooms : 0.03991413861513138
AveBedrms : 0.025820143520832062
Population : 0.022940294817090034
AveOccup : 0.14398936927318573
Latitude : 0.09579730778932571
Longitude : 0.10747838765382767
