# Ensemble voting
In this notebook we will use the models that tuned their parameters in optuna again, and utilize in one model.

In [1]:
import pandas as pd
import numpy as np
import optuna

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import VotingRegressor
from catboost import CatBoostRegressor

from sklearn.model_selection import KFold
from sklearn.metrics import root_mean_squared_error

## Models parameters

In [2]:
# TODO: don't forget device='cuda'
xgb_parameters = {
    'max_leaves': 40,
    'max_depth': 281,
    'learning_rate': 0.08792944959024201,
    'n_estimators': 632,
    'subsample': 0.6900703019224456,
    'colsample_bytree': 0.767812706804894,
    'device': 'cuda',
}

lgbm_parameters = {
    'num_leaves': 66,
    'max_depth': 449,
    'learning_rate': 0.03502441310895256,
    'n_estimators': 820,
    'subsample': 0.592592101509771,
    'colsample_bytree': 0.6862792428425046
}

sgd_parameters = {
    'penalty': None,
    'alpha': 4.223601774273774,
    'l1_ratio': 0.8940821225850496
}

bayes_ridge_parameters = {
    'alpha_1': 0.012972867153230679,
    'alpha_2': 0.03937728242649084,
    'lambda_1': 5.3000736672220825e-06,
    'lambda_2': 0.07761015011781103
}

cat_parameters = {
    'learning_rate': 0.029247253288660423,
    'depth': 9,
    'l2_leaf_reg': 6.715144149188259,
    'random_strength': 0.29075067665837756,
    'bagging_temperature': 0.3097108651010562,
    'task_type': 'GPU',
}

## Data and evaluation

In [3]:
df = pd.read_csv('data/train_pp.csv')
test_df = pd.read_csv('data/test_pp.csv')

X = df.drop(['accident_risk', 'id'], axis=1)
y = df['accident_risk']

In [4]:
def submission_generator(trained_model):
    test_df_preprocessed = test_df.drop('id', axis=1)
    return pd.concat([test_df['id'], pd.Series(trained_model.predict(test_df_preprocessed))], axis=1).rename({0: 'accident_risk'}, axis=1)

## Model prepration

### UTILIZE WEIGHT ALSO

In [5]:
xgb_model = XGBRegressor(**xgb_parameters)
lgbm_model = LGBMRegressor(**lgbm_parameters)
sgd_model = SGDRegressor(**sgd_parameters)
bayes_model = BayesianRidge(**bayes_ridge_parameters)
cat_model = CatBoostRegressor(**cat_parameters)

In [6]:
estimators = [
    ('xgb', xgb_model),
    ('lgbm', lgbm_model),
    ('sgd', sgd_model),
    ('bayes', bayes_model),
    ('cat', cat_model),
]

In [7]:
kf = KFold(5, shuffle=True)

rmses = []

for train_index, test_index in kf.split(X):
    voting_regressor = VotingRegressor(estimators)
    
    X_train = X.iloc[train_index]
    y_train = y.iloc[train_index]
    X_test = X.iloc[test_index]
    y_test = y.iloc[test_index]

    voting_regressor.fit(X_train, y_train)
    y_pred = voting_regressor.predict(X_test)

    rmse = root_mean_squared_error(y_pred, y_test)
    rmses.append(rmse)
    print('---' * 10)
    print(rmse)
    print('---' * 10)

np.mean(rmses)     

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003782 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 428
[LightGBM] [Info] Number of data points in the train set: 414203, number of used features: 17
[LightGBM] [Info] Start training from score 0.352419
0:	learn: 0.1620722	total: 10.8ms	remaining: 10.8s
1:	learn: 0.1580464	total: 14.3ms	remaining: 7.15s
2:	learn: 0.1541423	total: 17.7ms	remaining: 5.9s
3:	learn: 0.1503670	total: 21.1ms	remaining: 5.26s
4:	learn: 0.1467178	total: 24.5ms	remaining: 4.88s
5:	learn: 0.1431824	total: 27.8ms	remaining: 4.61s
6:	learn: 0.1397577	total: 31.2ms	remaining: 4.42s
7:	learn: 0.1364546	total: 34.5ms	remaining: 4.28s
8:	learn: 0.1332653	total: 37.8ms	remaining: 4.16s
9:	learn: 0.1301909	total: 41.2ms	remaining: 4.07s
10:	learn: 0.1272072	total: 44.5ms	remaining: 4s
11:	learn: 0.1243251	total: 47.8ms	r

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


------------------------------
0.05859181464742555
------------------------------
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002843 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 428
[LightGBM] [Info] Number of data points in the train set: 414203, number of used features: 17
[LightGBM] [Info] Start training from score 0.352285
0:	learn: 0.1622528	total: 3.62ms	remaining: 3.61s
1:	learn: 0.1582151	total: 6.92ms	remaining: 3.45s
2:	learn: 0.1543092	total: 10.2ms	remaining: 3.4s
3:	learn: 0.1505296	total: 13.6ms	remaining: 3.38s
4:	learn: 0.1468731	total: 16.9ms	remaining: 3.37s
5:	learn: 0.1433391	total: 20.2ms	remaining: 3.35s
6:	learn: 0.1399230	total: 23.5ms	remaining: 3.34s
7:	learn: 0.1366175	total: 26.8ms	remaining: 3.32s
8:	learn: 0.1334342	total: 30.1ms	remaining: 3.31s
9:	learn: 0.1303443	total: 33.4ms	remaining: 3.3s
10:	

np.float64(0.058326995787878345)

In [8]:
voting_regressor = VotingRegressor(estimators)
voting_regressor.fit(X, y)

submission = submission_generator(voting_regressor)
submission.to_csv('voting_meta_optuna.csv', index=False)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003231 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 432
[LightGBM] [Info] Number of data points in the train set: 517754, number of used features: 17
[LightGBM] [Info] Start training from score 0.352377
0:	learn: 0.1622390	total: 3.89ms	remaining: 3.88s
1:	learn: 0.1581958	total: 7.51ms	remaining: 3.75s
2:	learn: 0.1542864	total: 11.1ms	remaining: 3.7s
3:	learn: 0.1505057	total: 14.8ms	remaining: 3.69s
4:	learn: 0.1468524	total: 18.5ms	remaining: 3.67s
5:	learn: 0.1433061	total: 22ms	remaining: 3.65s
6:	learn: 0.1398870	total: 25.6ms	remaining: 3.63s
7:	learn: 0.1365796	total: 29.2ms	remaining: 3.62s
8:	learn: 0.1333802	total: 32.8ms	remaining: 3.61s
9:	learn: 0.1302913	total: 36.4ms	remaining: 3.6s
10:	learn: 0.1273069	total: 39.9ms	remaining: 3.59s
11:	learn: 0.1244367	total: 43.5ms	r

### Use optuna for weights ? 

In [15]:
kf = KFold(5, shuffle=True)

def objective(trial):
    # a: xgb, b: lgbm, c: sgd, d: bayes, e: cat
    a = trial.suggest_float('a', 0, 1)
    b = trial.suggest_float('b', 0, 1)
    c = trial.suggest_float('c', 0, 1)
    d = trial.suggest_float('d', 0, 1)
    e = trial.suggest_float('e', 0, 1)

    weights = [a, b, c, d, e]

    rmses = []
    
    for train_index, test_index in kf.split(X):
        voting_regressor = VotingRegressor(estimators=estimators, weights=weights)
        
        X_train = X.iloc[train_index]
        y_train = y.iloc[train_index]
        X_test = X.iloc[test_index]
        y_test = y.iloc[test_index]
    
        voting_regressor.fit(X_train, y_train)
        y_pred = voting_regressor.predict(X_test)
    
        rmse = root_mean_squared_error(y_pred, y_test)
        rmses.append(rmse)
    
    return np.mean(rmses)

In [None]:
vote_weight_study = optuna.create_study()
vote_weight_study.optimize(objective, n_trials=2)

[I 2025-10-26 23:33:20,834] A new study created in memory with name: no-name-f90542df-bf80-4cdb-a3cb-6fcf48334210


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002705 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 429
[LightGBM] [Info] Number of data points in the train set: 414203, number of used features: 17
[LightGBM] [Info] Start training from score 0.352220
0:	learn: 0.1622225	total: 4.86ms	remaining: 4.86s
1:	learn: 0.1581941	total: 8.27ms	remaining: 4.12s
2:	learn: 0.1542892	total: 11.8ms	remaining: 3.93s
3:	learn: 0.1505139	total: 15.4ms	remaining: 3.82s
4:	learn: 0.1468513	total: 19ms	remaining: 3.79s
5:	learn: 0.1433315	total: 23.3ms	remaining: 3.86s
6:	learn: 0.1399154	total: 26.8ms	remaining: 3.8s
7:	learn: 0.1366085	total: 30.4ms	remaining: 3.77s
8:	learn: 0.1334101	total: 34.1ms	remaining: 3.75s
9:	learn: 0.1303245	total: 37.4ms	remaining: 3.7s
10:	learn: 0.1273404	total: 41.8ms	remaining: 3.75s
11:	learn: 0.1244551	total: 45.4ms	r