In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_validate
import tqdm

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb
from catboost import CatBoostRegressor

In [3]:
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from mlxtend.regressor import StackingRegressor
import warnings
warnings.filterwarnings('ignore')

### Loading data

In [4]:
def get_data():
    file = open('./data/shashlik_61_pulses.txt', 'r')
    data = file.readlines()
    data = np.array([list(map(float, experiment.split())) for experiment in data])
   
    X = data[:, 2:]
    y_baseline = data[:, 1]
    y = data[:, 0]
    
    return X, y

In [5]:
def get_freq_data(X, freq=20, start_point=355):
    X_freq = np.concatenate([X[:, start_point::-freq], X[:, start_point::freq]], axis=1)
    return X_freq

In [6]:
X, y = get_data()

### Pipelines

In [7]:
huber_reg = HuberRegressor(epsilon= 5.09, alpha= 0.0004)
ridge_reg = linear_model.Ridge(solver='saga', max_iter=4000, alpha= 0.582)
lasso_reg = linear_model.Lasso(max_iter=4000, alpha=0.0038, normalize=False)
dt_reg = tree.DecisionTreeRegressor(min_samples_split=7, min_samples_leaf=7, min_weight_fraction_leaf=0.000516, 
                                                                                             max_features='auto')
    
pa_reg = PassiveAggressiveRegressor(max_iter=3600, tol=1e-3)
xgb_reg = xgb.XGBRegressor(objective="reg:linear", alpha= 0.00244, booster='dart', eta= 0.017326, gamma=0.19504, 
                           reg_lambda=0.22451, max_depth=8)

In [8]:
models = [huber_reg, ridge_reg, lasso_reg, dt_reg, pa_reg]

### Let's find the best ensemble

In [11]:
for freq in range(20, 70, 10):
    print("Frequency:", freq)
    X, y = get_data()
    X = get_freq_data(X, freq=freq)
    for model in models:    
        print("Metaregressor:", type(model).__name__)
        stregr = StackingRegressor(regressors=models, meta_regressor=model)
        scoring = ['r2', 'neg_mean_squared_error', 'neg_mean_absolute_error']
        scores = cross_validate(stregr, X, y, scoring=scoring, cv=5)

        print("95% confindence interval:")
        print("r2_score: %0.5f (+/- %0.5f)" % (scores['test_r2'].mean(), scores['test_r2'].std() * 2))
        print("MSE: %0.5f (+/- %0.5f)" % (-scores['test_neg_mean_squared_error'].mean(), scores['test_neg_mean_squared_error'].std() * 2))
        print("MAE: %0.5f (+/- %0.5f)" % (-scores['test_neg_mean_absolute_error'].mean(), scores['test_neg_mean_absolute_error'].std() * 2))
        print()
        print("----------------------------")
    print()
    print('____________________________________________________________')
    print()

Frequency: 20
Metaregressor: HuberRegressor
95% confindence interval:
r2_score: 0.99921 (+/- 0.00010)
MSE: 0.12881 (+/- 0.01257)
MAE: 0.27774 (+/- 0.00850)

----------------------------
Metaregressor: Ridge
95% confindence interval:
r2_score: 0.99921 (+/- 0.00011)
MSE: 0.12770 (+/- 0.01337)
MAE: 0.27652 (+/- 0.00939)

----------------------------
Metaregressor: Lasso
95% confindence interval:
r2_score: 0.99920 (+/- 0.00011)
MSE: 0.13068 (+/- 0.01313)
MAE: 0.27998 (+/- 0.00815)

----------------------------
Metaregressor: DecisionTreeRegressor
95% confindence interval:
r2_score: 0.99914 (+/- 0.00012)
MSE: 0.13952 (+/- 0.01482)
MAE: 0.28963 (+/- 0.01108)

----------------------------
Metaregressor: PassiveAggressiveRegressor
95% confindence interval:
r2_score: 0.99887 (+/- 0.00115)
MSE: 0.18291 (+/- 0.17943)
MAE: 0.33882 (+/- 0.19466)

----------------------------

____________________________________________________________

Frequency: 30
Metaregressor: HuberRegressor
95% confindence in