In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error

In [2]:
def bagging(preds, cols):
    y_predicted = np.zeros(len(preds[0]))
    for df in preds:
        for c in cols:
            y_predicted += df[c]
    y_predicted = y_predicted/(len(preds)*len(cols))
    rmse = np.sqrt(mean_squared_error(df['online_hours'], y_predicted))
    print("RMSE: %.5f"% rmse)

# Bagging Plain 

In [3]:
pred_v1 = pd.read_csv('preds/pred_v1.csv', parse_dates=['date'])
pred_v2 = pd.read_csv('preds/pred_v2.csv', parse_dates=['date'])

In [4]:
bagging([pred_v1, pred_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext', 'pred_ada'])
bagging([pred_v1, pred_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext'])
bagging([pred_v1, pred_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf'])
bagging([pred_v1, pred_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb'])
bagging([pred_v1, pred_v2], ['pred_lgbm', 'pred_xgb'])

RMSE: 1.81513
RMSE: 1.80633
RMSE: 1.80563
RMSE: 1.80986
RMSE: 1.81209


# Bagging Recursive  

In [5]:
pred_r_v1 = pd.read_csv('preds/pred_recursive_v1.csv', parse_dates=['date'])
pred_r_v2 = pd.read_csv('preds/pred_recursive_v2.csv', parse_dates=['date'])

In [6]:
bagging([pred_r_v1, pred_r_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext', 'pred_ada'])
bagging([pred_r_v1, pred_r_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext'])
bagging([pred_r_v1, pred_r_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf'])
bagging([pred_r_v1, pred_r_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb'])
bagging([pred_r_v1, pred_r_v2], ['pred_lgbm', 'pred_xgb'])

RMSE: 1.81493
RMSE: 1.80621
RMSE: 1.80504
RMSE: 1.80729
RMSE: 1.80971


# Bagging Permutation 

In [7]:
pred_perm_v1 = pd.read_csv('preds/pred_perm_v1.csv', parse_dates=['date'])
pred_perm_v2 = pd.read_csv('preds/pred_perm_v2.csv', parse_dates=['date'])

In [8]:
bagging([pred_perm_v1, pred_perm_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext', 'pred_ada'])
bagging([pred_perm_v1, pred_perm_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext'])
bagging([pred_perm_v1, pred_perm_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf'])
bagging([pred_perm_v1, pred_perm_v2], ['pred_catboost', 'pred_lgbm', 'pred_xgb'])
bagging([pred_perm_v1,pred_perm_v2], ['pred_lgbm', 'pred_xgb'])

RMSE: 1.81433
RMSE: 1.80572
RMSE: 1.80488
RMSE: 1.80680
RMSE: 1.80923


# Bagging Feature Selection 

In [9]:
source = [pred_perm_v1, pred_perm_v2, pred_r_v1, pred_r_v2]
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext', 'pred_ada'])
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext'])
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf'])
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb'])
bagging(source, ['pred_lgbm', 'pred_xgb'])

RMSE: 1.81398
RMSE: 1.80510
RMSE: 1.80444
RMSE: 1.80695
RMSE: 1.80933


# Bagging All

In [10]:
source = [pred_perm_v1, pred_perm_v2, pred_r_v1, pred_r_v2, pred_v1, pred_v2]
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext', 'pred_ada'])
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf', 'pred_ext'])
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb', 'pred_rf'])
bagging(source, ['pred_catboost', 'pred_lgbm', 'pred_xgb'])
bagging(source, ['pred_lgbm', 'pred_xgb'])

RMSE: 1.81302
RMSE: 1.80371
RMSE: 1.80405
RMSE: 1.80761
RMSE: 1.80997
