In [3]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_validate
import tqdm
from math import sqrt
import sys
sys.path.insert(0, '../problem2/')

### Loading data

In [4]:
from common import get_data, get_freq_data, signal_cyclic_shift

In [7]:
X, y = get_data(is_one_signal=True)
X = get_freq_data(X, freq=15)
y = y - np.mean(y)

### Pipelines

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb
from catboost import CatBoostRegressor

### Let's find the best ensemble

In [5]:
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from mlxtend.regressor import StackingRegressor
import warnings
warnings.filterwarnings('ignore')

In [6]:
huber_reg = HuberRegressor(epsilon= 5.09, alpha= 0.0004)
ridge_reg = linear_model.Ridge(solver='saga', max_iter=4000, alpha= 0.582)
lasso_reg = linear_model.Lasso(max_iter=4000, alpha=0.0038, normalize=False)
dt_reg = tree.DecisionTreeRegressor(min_samples_split=7, min_samples_leaf=7, min_weight_fraction_leaf=0.000516, 
                                                                                             max_features='auto')
    
pa_reg = PassiveAggressiveRegressor(C = 0.55, max_iter=3600, tol=2.4919865657850927e-06)
xgb_reg = xgb.XGBRegressor(objective="reg:linear", alpha= 0.00244, booster='dart', eta= 0.017326, gamma=0.19504, 
                           reg_lambda=0.22451, max_depth=8, verbosity=0)

models = [huber_reg, ridge_reg, lasso_reg, dt_reg, xgb_reg, pa_reg]

In [7]:
model_names = [type(model).__name__ for model in models]
r2_scores_mean = []
mse_scores_mean = []
mae_scores_mean = []
r2_scores_std = []
mse_scores_std = []
mae_scores_std = []

TIME_SCALE_COEF = 0.2

for model in models:    
    print("Metaregressor:", type(model).__name__)
    stregr = StackingRegressor(regressors=models, meta_regressor=model)
    scoring = ['r2', 'neg_mean_squared_error', 'neg_mean_absolute_error']
    scores = cross_validate(stregr, X, y, scoring=scoring, cv=5)
   
    print("95% confidence interval:")
    print("r2_score: %0.5f (+/- %0.5f)" % (scores['test_r2'].mean(), scores['test_r2'].std() * 2))
    print("MSE[ns^2]: %0.5f (+/- %0.5f)" % (TIME_SCALE_COEF**2 * -scores['test_neg_mean_squared_error'].mean(), TIME_SCALE_COEF * scores['test_neg_mean_squared_error'].std() * 2))
    print("RMSE[ns]: %0.5f (+/- %0.5f)" % (TIME_SCALE_COEF * np.sqrt(-scores['test_neg_mean_squared_error']).mean(), np.sqrt(TIME_SCALE_COEF**2 * -scores['test_neg_mean_squared_error']).std() * 2))
    print("MAE[ns]: %0.5f (+/- %0.5f)" % (TIME_SCALE_COEF * -scores['test_neg_mean_absolute_error'].mean(), TIME_SCALE_COEF * scores['test_neg_mean_absolute_error'].std() * 2))
    print('----------------------------------')

Metaregressor: HuberRegressor
95% confidence interval:
r2_score: 0.99959 (+/- 0.00004)
MSE[ns^2]: 0.00269 (+/- 0.00103)
RMSE[ns]: 0.05182 (+/- 0.00198)
MAE[ns]: 0.04026 (+/- 0.00075)
----------------------------------
Metaregressor: Ridge
95% confidence interval:
r2_score: 0.99958 (+/- 0.00005)
MSE[ns^2]: 0.00272 (+/- 0.00104)
RMSE[ns]: 0.05217 (+/- 0.00199)
MAE[ns]: 0.04052 (+/- 0.00063)
----------------------------------
Metaregressor: Lasso
95% confidence interval:
r2_score: 0.99940 (+/- 0.00008)
MSE[ns^2]: 0.00391 (+/- 0.00203)
RMSE[ns]: 0.06249 (+/- 0.00324)
MAE[ns]: 0.04826 (+/- 0.00121)
----------------------------------
Metaregressor: DecisionTreeRegressor
95% confidence interval:
r2_score: 0.99956 (+/- 0.00004)
MSE[ns^2]: 0.00288 (+/- 0.00107)
RMSE[ns]: 0.05367 (+/- 0.00201)
MAE[ns]: 0.04178 (+/- 0.00093)
----------------------------------
Metaregressor: XGBRegressor
95% confidence interval:
r2_score: 0.99959 (+/- 0.00003)
MSE[ns^2]: 0.00264 (+/- 0.00072)
RMSE[ns]: 0.05139 (+/

#### As we can see the best r2_score 0.99959 achieves if using huber regressor as metamodel