In [4]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
import sys

sys.path.append('../src')

from stacking_transformer import RegressionStackingTransformer

In [5]:
# Load demo data
dataset = fetch_california_housing()
X, y = dataset.data, dataset.target
# Make train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=0)

In [7]:
estimators_L1 = [
    ('et', ExtraTreesRegressor(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('rf', RandomForestRegressor(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('knn',KNeighborsRegressor(n_neighbors=10))
]

# Stacking
n_folds = 5
stack = RegressionStackingTransformer(
    estimators=estimators_L1,
    shuffle=True,
    random_state=0,
    verbose=1,
    n_folds=n_folds,
)

stack.fit(X_train, y_train)
S_train = stack.transform(X_train)
S_test = stack.transform(X_test)
# # Use 2nd level estimator to get final prediction
estimator_L2 = GradientBoostingRegressor(random_state=0,
                                learning_rate=0.1,
                                n_estimators=100,
                                max_depth=3
                                )

estimator_L2 = estimator_L2.fit(S_train, y_train)
y_pred = estimator_L2.predict(S_test)

# Final prediction score
print('Final score: [%.8f]' % mean_absolute_error(y_test, y_pred))

metric:  mean_squared_error 
 n_estimators:  3 

Estimator: [et: ExtraTreesRegressor]
Mean Scores: [0.72550959]  -  Std Scrores: [0.02120710]

Estimator: [rf: RandomForestRegressor]
Mean Scores: [0.58133018]  -  Std Scrores: [0.02674405]

Estimator: [knn: KNeighborsRegressor]
Mean Scores: [1.14589167]  -  Std Scrores: [0.03048826]

Train set was detected.
Final score: [0.52926671]


In [12]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

# Init 1st level estimators
estimators_l1 = [
    ('et', ExtraTreesRegressor(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('rf', RandomForestRegressor(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('knn',KNeighborsRegressor(n_neighbors=10))
]
# Stacking
stack_l1 = RegressionStackingTransformer(
        estimators=estimators_l1,
        shuffle=True,
        random_state=0,
        verbose=1,
        n_jobs=-1
)


pipeline = Pipeline([
        ('stack_l1',stack_l1), 
        ("final_pred",LinearRegression())
])

In [13]:
pipeline.fit(X_train, y_train)

metric:  mean_squared_error 
 n_estimators:  3 

Estimator: [et: ExtraTreesRegressor]
Mean Scores: [0.72514138]  -  Std Scrores: [0.01519959]

Estimator: [rf: RandomForestRegressor]
Mean Scores: [0.58205053]  -  Std Scrores: [0.03158254]

Estimator: [knn: KNeighborsRegressor]
Mean Scores: [1.15414639]  -  Std Scrores: [0.02528988]

Train set was detected.


In [14]:
y_pred_test = pipeline.predict(X_test)

In [15]:
from sklearn.metrics import mean_absolute_percentage_error

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_test_lr = lr.predict(X_test)


print(f"MAPE with stacking: % {100*mean_absolute_percentage_error(y_test, y_pred_test):.2f}" )
print(f"MAPE without stacking: % {100*mean_absolute_percentage_error(y_test, y_pred_test_lr):.2f}")

MAPE with stacking: % 35.70
MAPE without stacking: % 32.33
