# Ensemble voting
In this notebook we will use the models that tuned their parameters in optuna again, and utilize in one model.

In [29]:
import pandas as pd
import numpy as np
import optuna

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import VotingRegressor
from catboost import CatBoostRegressor

from sklearn.model_selection import KFold
from sklearn.metrics import root_mean_squared_error

## Models parameters

In [31]:
# TODO: don't forget device='cuda'
xgb_parameters = {} # Must trained again, wrong cv

lgbm_parameters = {
    'num_leaves': 66,
    'max_depth': 449,
    'learning_rate': 0.03502441310895256,
    'n_estimatora': 820,
    'subsample': 0.592592101509771,
    'colsample_bytree': 0.6862792428425046
}

sgd_parameters = {
    'penalty': None,
    'alpha': 4.223601774273774,
    'l1_ratio': 0.8940821225850496
}

bayes_ridge_parameters = {
    'alpha_1': 0.012972867153230679,
    'alpha_2': 0.03937728242649084,
    'lambda_1': 5.3000736672220825e-06,
    'lambda_2': 0.07761015011781103
}

cat_parameters = {} # it's not trained, don't forget task_type='GPU'

## Data and evaluation

In [None]:
df = pd.read_csv('data/train_pp.csv')
test_df = pd.read_csv('data/test_pp.csv')

X = df.drop(['accident_risk', 'id'], axis=1)
y = df['accident_risk']

In [None]:
def submission_generator(trained_model):
    test_df_preprocessed = test_df.drop('id', axis=1)
    return pd.concat([test_df['id'], pd.Series(trained_model.predict(test_df_preprocessed))], axis=1).rename({0: 'accident_risk'}, axis=1)

## Model prepration

### UTILIZE WEIGHT ALSO

In [None]:
xgb_model = XGBRegressor(**xgb_parameters)
lgbm_model = LGBMRegressor(**lgbm_parameters)
sgd_model = SGDRegressor(**sgd_parameters)
bayes_model = BayesianRidge(**bayes_ridge_parameters)
cat_model = CatBoostRegressor(**cat_parameters)

In [None]:
estimators = [
    ('xgb', xgb_model),
    ('lgbm', lgbm_model),
    ('sgd', sgd_model),
    ('bayes', bayes_model),
    ('cat', cat_model),
]

In [None]:
kf = KFold(5, shuffle=True)

rmses = []

for train_index, test_index in kf.split(X):
    voting_regressor = VotingRegressor(estimators)
    
    X_train = X.iloc[train_index]
    y_train = y.iloc[train_index]
    X_test = X.iloc[test_index]
    y_test = y.iloc[test_index]

    voting_regressor.fit(X_train, y_train)
    y_pred = voting_regressor.predict(X_test)

    rmse = root_mean_squared_error(y_pred, y_test)
    rmses.append(rmse)

np.mean(rmses)     

### Use optuna for weights ? 

In [None]:
kf = KFold(5, shuffle=True)

def objective(trial):
    # a: xgb, b: lgbm, c: sgd, d: bayes, e: cat
    a = trial.suggest_float('a', 0, 1)
    b = trial.suggest_float('b', 0, 1)
    c = trial.suggest_float('c', 0, 1)
    d = trial.suggest_float('d', 0, 1)
    e = trial.suggest_float('e', 0, 1)

    weights = [a, b, c, d, e]

    rmses = []
    
    for train_index, test_index in kf.split(X):
        voting_regressor = VotingRegressor(estimators, weights)
        
        X_train = X.iloc[train_index]
        y_train = y.iloc[train_index]
        X_test = X.iloc[test_index]
        y_test = y.iloc[test_index]
    
        voting_regressor.fit(X_train, y_train)
        y_pred = voting_regressor.predict(X_test)
    
        rmse = root_mean_squared_error(y_pred, y_test)
        rmses.append(rmse)
    
    return np.mean(rmses)

In [None]:
vote_weight_study = optuna.create_study()
vote_weight_study.optimize(objective, n_trials=2)