In [4]:
print('------------Training Xgboost------------')
%run ./Preprocessing.ipynb

------------Training Xgboost------------
Preprocessed : xtrain, xtest, ytrain, ytest
(74736, 13) (18684, 13) (74736,) (18684,)


In [1]:
import optuna
from sklearn.model_selection import (
    StratifiedKFold, KFold
)
import xgboost as xgb
from xgboost import XGBClassifier
from xgboost.callback import EvaluationMonitor
from xgboost.callback import EarlyStopping
import plotly

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
opt_grid = {
        'early_stopping': 100,
        'number_of_trials': 100,
        'shuffle': True,
        'cv': KFold(n_splits=10, random_state=None, shuffle=True),
        'time_constraint': 60 * 1
    }
    
static_param = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc'
}

In [3]:
def objective(trial): 
        '''
    Objective function to optimise the hyperparamers of XGBoost
    Args:
         trial object (object): Trials evaluating an objective function. 
         This object provides interfaces to get parameter suggestion, manage the trial’s state, and set/get user-defined attributes of the trial.

    Returns:
          Study  (object): Trained Study Object
    '''  

        param_grid = {
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.8),
            # "num_leaves": trial.suggest_int("num_leaves", 31, 63, 5),
            "max_depth": trial.suggest_int("max_depth", 2, 30),
            'min_child_weight': trial.suggest_float("min_child_weight", 1, 500),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1),
            "colsample_bylevel": trial.suggest_float("colsample_bytree", 0.1, 1),
            "subsample": trial.suggest_float("subsample", 0.1, 1),
            "reg_alpha": trial.suggest_float("reg_alpha", 0, 5),
            "reg_lambda": trial.suggest_float("reg_lambda", 1, 5)
        }

        full_grid = {**param_grid, **static_param}

        pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "test-" + full_grid['eval_metric'])
        early_callback = xgb.callback.EarlyStopping(5)


        cv_results = xgb.cv(full_grid,
                                    dtrain=xgb.DMatrix(xtrain, label=ytrain),
                                    folds= opt_grid['cv'],
                                    metrics=full_grid['eval_metric'],
                                    callbacks=[pruning_callback, early_callback]
                                    )

        prep_string = 'test-' + full_grid['eval_metric'] + '-mean'
        scores = np.mean(cv_results[prep_string])


        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        return scores

optuna.logging.set_verbosity(optuna.logging.ERROR)
study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction='maximize')
study.optimize(objective,n_trials=opt_grid['number_of_trials'],show_progress_bar=False,timeout=opt_grid['time_constraint'])
model_parameters = study.best_params

NameError: name 'xtrain' is not defined

In [None]:
model_xgb = XGBClassifier(use_label_encoder= False)

In [None]:
#Set the parameters to the tuned parameters from the optuna study
model_xgb.set_params(**model_parameters)

In [None]:
#Fit the model on the training data
model_xgb=model_xgb.fit(xtrain,ytrain)
#Make predictions. 
pred_xgb=model_xgb.predict(xtest)
probs_xgb = model_xgb.predict_proba(xtest)[:, 1]

pred_xgb_train = model_xgb.predict(xtrain)
probs_xgb_train =  model_xgb.predict_proba(xtrain)[:, 1]

print('-----------model_xgb, pred_xgb, probs_xgb loaded-----------')