<font color="#CC3D3D"><p>
# Implementing BaggingRegressor as a wrapper(custom) class in sklearn

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import resample

In [4]:
# 데이터셋 로드
X = pd.read_csv('X_train_preprocessed.csv').drop(columns='ID')
y = pd.read_csv('y_train_2.csv').Salary

# 학습 데이터와 테스트 데이터로 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

In [5]:
# DecisionTreeRegressor 모델 학습 및 예측
tree = DecisionTreeRegressor(random_state=0)
tree.fit(X_train, y_train)
tree_pred = tree.predict(X_test)

# 회귀 평가 지표인 평균 제곱근 오차(RMSE) 계산
rmse = np.sqrt(mean_squared_error(y_test, tree_pred))
print("RMSE:", rmse)

RMSE: 1233.027807443444


##### 만일 dt를 불러왔을때 어떻게 성능이 나오느냐? 
##### 회귀문제니까 pred를 씀~ (회귀문제는 rmse를 많이씀)    
##### sklearn에서는 mse만 있으니까 mse에 루트 씌우면 rmse를 계산가능함
##### 
    
    

In [6]:
class MyBaggingRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, n_estimators=10):
        self.n_estimators = n_estimators
        self.estimators = []

    def fit(self, X, y):
        self.estimators = []
        for _ in range(self.n_estimators):
            estimator = DecisionTreeRegressor()
            X_resampled, y_resampled = resample(X, y)
            estimator.fit(X_resampled, y_resampled)
            self.estimators.append(estimator)
#fitting이 끝난 후 predict 진행, estimator를 회전하면서 나온 값의 평균값을 return해줌. 
    def predict(self, X):
        predictions = np.zeros((len(X), self.n_estimators))
        for i, estimator in enumerate(self.estimators):
            predictions[:, i] = estimator.predict(X)
        return np.mean(predictions, axis=1)

In [7]:
# BaggingRegressor 모델 학습 및 예측
bagging = MyBaggingRegressor(n_estimators=10)
bagging.fit(X_train, y_train)
bagging_pred = bagging.predict(X_test)

# 회귀 평가 지표인 평균 제곱근 오차(RMSE) 계산
rmse = np.sqrt(mean_squared_error(y_test, bagging_pred))
print("RMSE:", rmse)

RMSE: 901.9296492435932


<font color="#CC3D3D"><p>
# End