In [1]:
import pandas as pd
import numpy as np

import optuna

from xgboost import XGBClassifier
from sklearn.metrics import log_loss
import joblib

  from .autonotebook import tqdm as notebook_tqdm


#### **Getting training && validation data**

In [2]:
df_train = pd.read_csv('../databases/training.csv')
df_valid = pd.read_csv('../databases/validation.csv')

In [3]:
X_train, y_train = df_train.drop(['IND_BOM_1_1'], axis=1), df_train['IND_BOM_1_1']
X_val, y_val = df_valid.drop(['IND_BOM_1_1'], axis=1), df_valid['IND_BOM_1_1']

In [4]:
X_train = np.array(X_train)
X_val = np.array(X_val)

#### **Parameters selection**

In [5]:
params = {
    'max_depth': (3, 10),
    'learning_rate': (0.001, 0.1),
    'subsample': (0.5, 1.0),
    'colsample_bytree': (0.5, 1.0),
    'gamma': (1, 5),
    'min_child_weight': (1, 10),
    'n_estimators': (50, 200)
}

def objective(trial):
    model = XGBClassifier(
        max_depth=trial.suggest_int('max_depth', *params['max_depth']),
        learning_rate=trial.suggest_float('learning_rate', *params['learning_rate']),
        subsample=trial.suggest_float('subsample', *params['subsample']),
        colsample_bytree=trial.suggest_float('colsample_bytree', *params['colsample_bytree']),
        gamma=trial.suggest_float('gamma', *params['gamma']),
        min_child_weight=trial.suggest_int('min_child_weight', *params['min_child_weight']),
        n_estimators=trial.suggest_int('n_estimators', *params['n_estimators']),
        n_jobs=-1
    )

    model.fit(X_train, y_train, verbose=True)

    y_probas = model.predict_proba(X_val)

    loss = log_loss(y_val, y_probas)

    return loss

In [6]:
n_trials = 20

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

[32m[I 2023-04-29 18:09:08,690][0m A new study created in memory with name: no-name-13798e5b-2c93-48a1-b30c-f1f40ff5a765[0m
[32m[I 2023-04-29 18:09:47,262][0m Trial 0 finished with value: 0.6046263211501588 and parameters: {'max_depth': 8, 'learning_rate': 0.08861571124444137, 'subsample': 0.7814340841967233, 'colsample_bytree': 0.5413943258281566, 'gamma': 3.663043710958109, 'min_child_weight': 4, 'n_estimators': 114}. Best is trial 0 with value: 0.6046263211501588.[0m
[32m[I 2023-04-29 18:10:30,702][0m Trial 1 finished with value: 0.620459642447132 and parameters: {'max_depth': 4, 'learning_rate': 0.05304835308849388, 'subsample': 0.7491451488524594, 'colsample_bytree': 0.9402923339441683, 'gamma': 3.7216453854168594, 'min_child_weight': 8, 'n_estimators': 172}. Best is trial 0 with value: 0.6046263211501588.[0m
[32m[I 2023-04-29 18:11:10,244][0m Trial 2 finished with value: 0.6089623689531368 and parameters: {'max_depth': 8, 'learning_rate': 0.07767826980944188, 'subsampl

#### **Saving study**

In [7]:
save_path = './optuna_studies/xgboost_study.pkl'

joblib.dump(study, save_path)

['./optuna_studies/xgboost_study.pkl']