In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_validate
import tqdm

### Loading data

In [3]:
X = np.load('./data/X.npy')
X_polynomial = np.load('./data/X_polynomial.npy')
y = np.load('./data/y.npy')

### Pipelines

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb
from catboost import CatBoostRegressor

In [67]:
huber_reg = HuberRegressor(epsilon= 5.09, alpha= 0.0004)
ridge_reg = linear_model.Ridge(solver='saga', max_iter=4000, alpha= 0.582)
lasso_reg = linear_model.Lasso(max_iter=4000, alpha=0.0038, normalize=False)
dt_reg = tree.DecisionTreeRegressor(min_samples_split=7, min_samples_leaf=7, min_weight_fraction_leaf=0.000516, 
                                                                                             max_features='auto')
    
pa_reg = PassiveAggressiveRegressor(max_iter=3600, tol=1e-3)
xgb_reg = xgb.XGBRegressor(objective="reg:linear", alpha= 0.00244, booster='dart', eta= 0.017326, gamma=0.19504, 
                           reg_lambda=0.22451, max_depth=8)

In [68]:
models = [huber_reg, ridge_reg, lasso_reg, dt_reg, pa_reg]

### Count metrics

In [70]:
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score

In [71]:
import warnings
warnings.filterwarnings('ignore')

In [77]:
from mlxtend.regressor import StackingRegressor

stregr = StackingRegressor(regressors=models, meta_regressor=xgb_reg)

In [78]:
kf = KFold(n_splits=5)

folds_r2_scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


    stregr.fit(X_train, y_train)
    y_pred = stregr.predict(X_test)

#     y_pred = my_shuffle_voter.predict(X_test)
    print("Fold r2-score:", r2_score(y_test, y_pred))
#     print(y_test[:10])
#     print(y_pred[:10])
    print()
    folds_r2_scores.append(r2_score(y_test, y_pred))
    
print("Cross-validation score:", sum(folds_r2_scores)/float(len(folds_r2_scores)))

Fold r2-score: 0.9995103733678145

Fold r2-score: 0.999539343265982

Fold r2-score: 0.9995196410165623

Fold r2-score: 0.999527603347814

Fold r2-score: 0.9995250918797158

Cross-validation score: 0.9995244105755777
