In [1]:
import warnings
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

warnings.filterwarnings('ignore')

# 데이터셋
cali = fetch_california_housing()
df = pd.DataFrame(cali.data, columns=cali.feature_names)
df['MedHouseVal'] = cali.target

X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

# train/test 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# RandomForest Regressor
reg = RandomForestRegressor(
    n_estimators=200,    # 트리 개수
    max_depth=10,        # 트리 깊이 제한
    random_state=100,
    n_jobs=-1
)

reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

# 평가
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("RandomForest Results")
print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)
print()

# Feature Importance
print("[ Feature importance ]")
for i, feature in enumerate(cali.feature_names):
    print(f"{feature} : {reg.feature_importances_[i]}")


RandomForest Results
MSE: 0.29368904749934516
RMSE: 0.5419308512156742
R² Score: 0.7758798570705804

[ Feature importance ]
MedInc : 0.5928523306544012
HouseAge : 0.047449026182304774
AveRooms : 0.02988616447090051
AveBedrms : 0.01678397754140077
Population : 0.017201773504411835
AveOccup : 0.13961700991670345
Latitude : 0.07810556117163744
Longitude : 0.07810415655824006
