In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_validate
import tqdm

### Loading data

In [2]:
X = np.load('./data/X.npy')
X_polynomial = np.load('./data/X_polynomial.npy')
y = np.load('./data/y.npy')

### Pipelines

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb
from catboost import CatBoostRegressor

In [11]:
huber_reg = HuberRegressor(epsilon= 5.09, alpha= 0.0004)
ridge_reg = linear_model.Ridge(solver='saga', max_iter=4000, alpha= 0.582)
lasso_reg = linear_model.Lasso(max_iter=4000, alpha=0.0038, normalize=False)
dt_reg = tree.DecisionTreeRegressor(min_samples_split=7, min_samples_leaf=7, min_weight_fraction_leaf=0.000516, 
                                                                                             max_features='auto')
    
pa_reg = PassiveAggressiveRegressor(max_iter=3600, tol=1e-3)
xgb_reg = xgb.XGBRegressor(objective="reg:linear", alpha= 0.00244, booster='dart', eta= 0.017326, gamma=0.19504, 
                           reg_lambda=0.22451, max_depth=8)

In [12]:
models = [huber_reg, ridge_reg, lasso_reg, dt_reg, pa_reg]

### Let's find the best ensemble

In [13]:
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from mlxtend.regressor import StackingRegressor
import warnings
warnings.filterwarnings('ignore')

In [45]:
huber_reg = HuberRegressor(epsilon= 5.09, alpha= 0.0004)
ridge_reg = linear_model.Ridge(solver='saga', max_iter=4000, alpha= 0.582)
lasso_reg = linear_model.Lasso(max_iter=4000, alpha=0.0038, normalize=False)
dt_reg = tree.DecisionTreeRegressor(min_samples_split=7, min_samples_leaf=7, min_weight_fraction_leaf=0.000516, 
                                                                                             max_features='auto')
    
pa_reg = PassiveAggressiveRegressor(max_iter=3600, tol=1e-3)
xgb_reg = xgb.XGBRegressor(objective="reg:linear", alpha= 0.00244, booster='dart', eta= 0.017326, gamma=0.19504, 
                           reg_lambda=0.22451, max_depth=8, verbosity=0)

models = [huber_reg, ridge_reg, lasso_reg, dt_reg, xgb_reg, pa_reg]

In [49]:
for model in models:    
    print("Metaregressor:", model)
    stregr = StackingRegressor(regressors=models, meta_regressor=model)
    scoring = ['r2', 'neg_mean_squared_error', 'neg_mean_absolute_error']
    scores = cross_validate(stregr, X, y, scoring=scoring, cv=5)

    print("95% confindence interval:")
    print("r2_score: %0.5f (+/- %0.5f)" % (scores['test_r2'].mean(), scores['test_r2'].std() * 2))
    print("MSE: %0.5f (+/- %0.5f)" % (-scores['test_neg_mean_squared_error'].mean(), scores['test_neg_mean_squared_error'].std() * 2))
    print("MAE: %0.5f (+/- %0.5f)" % (-scores['test_neg_mean_absolute_error'].mean(), scores['test_neg_mean_absolute_error'].std() * 2))
    print()
    print("----------------------------")

Metaregressor: HuberRegressor(alpha=0.0004, epsilon=5.09, fit_intercept=True, max_iter=100,
               tol=1e-05, warm_start=False)
95% confindence interval:
r2_score: 0.99952 (+/- 0.00003)
MSE: 0.07785 (+/- 0.00560)
MAE: 0.21592 (+/- 0.00473)

----------------------------
Metaregressor: Ridge(alpha=0.582, copy_X=True, fit_intercept=True, max_iter=4000,
      normalize=False, random_state=None, solver='saga', tol=0.001)
95% confindence interval:
r2_score: 0.99951 (+/- 0.00003)
MSE: 0.07888 (+/- 0.00543)
MAE: 0.21718 (+/- 0.00451)

----------------------------
Metaregressor: Lasso(alpha=0.0038, copy_X=True, fit_intercept=True, max_iter=4000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)
95% confindence interval:
r2_score: 0.99931 (+/- 0.00009)
MSE: 0.11186 (+/- 0.01135)
MAE: 0.25714 (+/- 0.00729)

----------------------------
Metaregressor: DecisionTreeRegressor(criterion='mse', max_depth=None, max

In [None]:
#### As we can see the best r2_score 0.99952 achieves if using huber regressor as metamodel