In [1]:

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
import sys
sys.path.append('../src')

from stacking_transformer import RegressionStackingTransformer

In [8]:

X, y = make_regression(n_samples=3000, n_features=10, n_informative=2, noise=0.2)
# Make train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=123)

In [11]:

import time

start = time.time()


estimators_L1 = [
    ('m1', LinearRegression()),
    ('m2', ExtraTreesRegressor()),
    ('m3', RandomForestRegressor()),
    ('m4', GradientBoostingRegressor()),
    ('m5', KNeighborsRegressor()),
    ('m6', Lasso()),
    ("m7", Ridge())
]

# Stacking
n_folds = 5
stack = RegressionStackingTransformer(
    estimators=estimators_L1,
    shuffle=True,
    random_state=0,
    verbose=1,
    n_folds=n_folds,
)

stack.fit(X_train, y_train)
S_train = stack.transform(X_train)
S_test = stack.transform(X_test)
# # Use 2nd level estimator to get final prediction
estimator_L2 = LinearRegression()

estimator_L2 = estimator_L2.fit(S_train, y_train)
y_pred = estimator_L2.predict(S_test)

# Final prediction score
print("*"*20)
print('Final score: [%.8f]' % mean_absolute_error(y_test, y_pred))

end = time.time()
print("Total fit and predict time: ", end - start)

metric:  [mean_absolute_error] 
 n_estimators:  [7] 

Estimator  0: [m1: LinearRegression]
Mean Scores: [0.15814622]  -  Std Scrores: [0.00406993]

Estimator  1: [m2: ExtraTreesRegressor]
Mean Scores: [0.50871496]  -  Std Scrores: [0.04327420]

Estimator  2: [m3: RandomForestRegressor]
Mean Scores: [0.85871835]  -  Std Scrores: [0.06987607]

Estimator  3: [m4: GradientBoostingRegressor]
Mean Scores: [0.96144932]  -  Std Scrores: [0.06123278]

Estimator  4: [m5: KNeighborsRegressor]
Mean Scores: [18.47093793]  -  Std Scrores: [0.80020844]

Estimator  5: [m6: Lasso]
Mean Scores: [1.11113198]  -  Std Scrores: [0.04522909]

Estimator  6: [m7: Ridge]
Mean Scores: [0.15897775]  -  Std Scrores: [0.00425514]

Train set was detected.
********************
Final score: [0.16620016]
Total fit and predict time:  23.981504917144775


In [12]:
# Parallel

start = time.time()

estimators_L1 = [
    ('m1', LinearRegression()),
    ('m2', ExtraTreesRegressor()),
    ('m3', RandomForestRegressor()),
    ('m4', GradientBoostingRegressor()),
    ('m5', KNeighborsRegressor()),
    ('m6', Lasso()),
    ("m7", Ridge())
]

# Stacking
n_folds = 5
stack_p = RegressionStackingTransformer(
    estimators=estimators_L1,
    shuffle=True,
    random_state=0,
    verbose=1,
    n_folds=n_folds,
    njobs=-1
)

stack_p.fit(X_train, y_train)
S_train_p = stack_p.transform(X_train)
S_test_p = stack_p.transform(X_test)

# # Use 2nd level estimator to get final prediction
estimator_L2 = LinearRegression()

estimator_L2 = estimator_L2.fit(S_train_p, y_train)
y_pred_p = estimator_L2.predict(S_test_p)

# Final prediction score
print("*"*20)
print('Final score: [%.8f]' % mean_absolute_error(y_test, y_pred_p))

end = time.time()
print("Total fit and predict time: ", end - start)

metric: [mean_absolute_error] 
 n_estimators: [7] 



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    7.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Estimator: [m1: LinearRegression]
Mean Scores: [0.15814622]  -  Std Scrores: [0.00406993]

Estimator: [m2: ExtraTreesRegressor]
Mean Scores: [0.51007961]  -  Std Scrores: [0.05274760]

Estimator: [m3: RandomForestRegressor]
Mean Scores: [0.85552756]  -  Std Scrores: [0.07235703]

Estimator: [m4: GradientBoostingRegressor]
Mean Scores: [0.95941172]  -  Std Scrores: [0.05950958]

Estimator: [m5: KNeighborsRegressor]
Mean Scores: [18.47093793]  -  Std Scrores: [0.80020844]

Estimator: [m6: Lasso]
Mean Scores: [1.11113198]  -  Std Scrores: [0.04522909]

Estimator: [m7: Ridge]
Mean Scores: [0.15897775]  -  Std Scrores: [0.00425514]

Train set was detected.


[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    2.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


********************
Final score: [0.16543262]
Total fit and predict time:  13.064667224884033


[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    2.8s finished
