In [1]:
#necessary imports
import optuna
import numpy as np
from joblib import dump, load
from sklearn.preprocessing import RobustScaler
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import  XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import FunctionTransformer
from sklearn.metrics import mean_squared_error,mean_absolute_error
from optuna.samplers import TPESampler

In [2]:
#log transform function
LogTransform = FunctionTransformer(func=np.log, inverse_func=np.exp)

In [3]:
#load pickeled train set for hyperparameter tunning
train_set=load('/content/drive/MyDrive/Datascience_projects/Autotrader_SA/train_hyperparameter_tuning/train__no_outliers.joblib')

In [None]:
# Set the random seed
sampler = TPESampler(seed=88)

# Define the objective function to minimize
def objective(trial):
    # Sample hyperparameters from the search space
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000,step=50),
        'max_depth': trial.suggest_int('max_depth', 1, 20),
        'learning_rate': trial.suggest_float('learning_rate', 0.001,0.1),
        'gamma': trial.suggest_float('gamma', 0, 1),
        'colsample_bytree':trial.suggest_float('colsample_bytree', 0, 1,step=0.1),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 20),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 15),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 15),
    }
    # Define the model to use
    xgb_model = XGBRegressor(**params, random_state=46, n_jobs=-1)
    # Define the transformed target regressor to use
    model=TransformedTargetRegressor(regressor=xgb_model, transformer=LogTransform)
    # Evaluate the model using cross-validation
    score = cross_val_score(model, train_set['X_train'], train_set['y_train'], cv=5, scoring='neg_mean_absolute_error').mean()
    
    # Return the negative score (since Optuna tries to minimize the objective function)
    return -score

# Run the optimization
xgb_study = optuna.create_study(direction='minimize', sampler=sampler)
xgb_study.optimize(objective, n_trials=500)

# Print best score and parameters
print('\n')
best_score = xgb_study.best_value
best_params = xgb_study.best_params
print("Best score: ", best_score)
print('Best parameters:', best_params)

[I 2023-06-02 13:43:49,631] A new study created in memory with name: no-name-4686ae16-2666-4d62-8a25-a7e0e9de65f6
[I 2023-06-02 13:44:24,803] Trial 0 finished with value: 38786.96491303906 and parameters: {'n_estimators': 700, 'max_depth': 11, 'learning_rate': 0.053305796635071706, 'gamma': 0.8962852040098879, 'colsample_bytree': 0.7000000000000001, 'min_child_weight': 15, 'subsample': 0.8586691910457004, 'reg_alpha': 3.342291932742916, 'reg_lambda': 2.6273178498603453}. Best is trial 0 with value: 38786.96491303906.
[I 2023-06-02 13:44:27,760] Trial 1 finished with value: 410045.4541469341 and parameters: {'n_estimators': 500, 'max_depth': 19, 'learning_rate': 0.001978702817004398, 'gamma': 0.08992219468395357, 'colsample_bytree': 0.9, 'min_child_weight': 10, 'subsample': 0.9384177959743876, 'reg_alpha': 4.610009082977562, 'reg_lambda': 5.819483192421268}. Best is trial 0 with value: 38786.96491303906.
[I 2023-06-02 13:44:37,329] Trial 2 finished with value: 39230.39275198332 and para



Best score:  29890.724777512205
Best parameters: {'n_estimators': 900, 'max_depth': 4, 'learning_rate': 0.09683919591334456, 'gamma': 0.00015286854567638669, 'colsample_bytree': 0.5, 'min_child_weight': 10, 'subsample': 0.8380391485033669, 'reg_alpha': 0.022198038157775324, 'reg_lambda': 7.569387456939101}
