In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import optuna
import gc
xgb.__version__

'1.5.0'

In [2]:
train_x = pd.read_csv('../input/xgtrain.csv')
test_x = pd.read_csv('../input/xgval.csv')

In [3]:
train_y = train_x['target']
test_y = test_x['target']

del train_x['target']
del test_x['target']

In [4]:
train_x.replace([np.inf, -np.inf], np.nan, inplace=True)
test_x.replace([np.inf, -np.inf], np.nan, inplace=True)

In [5]:
dtrain = xgb.DMatrix(train_x, label=train_y)
dvalid = xgb.DMatrix(test_x, label=test_y)

In [6]:
num_round = 1000

In [7]:
def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['binary:logistic']), 
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15,17,20]),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['logloss']),

    }

    model = xgb.train(params, dtrain, num_round)
    predictions = model.predict(dvalid)

    roc = roc_auc_score(test_y, predictions)
    
    return roc

In [8]:
study = optuna.create_study(direction='maximize')

[32m[I 2021-11-14 13:12:49,110][0m A new study created in memory with name: no-name-9ae846cd-c193-4869-940c-d507f2353a15[0m


In [9]:
%%time
study.optimize(objective, n_trials=2)

[32m[I 2021-11-14 13:13:44,723][0m Trial 0 finished with value: 0.7599902989254044 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 1.2767801092284488, 'alpha': 0.010838285032591802, 'colsample_bytree': 0.6215133356593129, 'subsample': 0.9163982927024152, 'learning_rate': 0.0013327977499660696, 'max_depth': 5, 'min_child_weight': 195, 'eval_metric': 'logloss'}. Best is trial 0 with value: 0.7599902989254044.[0m
[32m[I 2021-11-14 13:16:57,164][0m Trial 1 finished with value: 0.7730942252850145 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.5413020892709048, 'alpha': 0.06757242655647304, 'colsample_bytree': 0.9478087079374942, 'subsample': 0.701343831900148, 'learning_rate': 0.0015111027192229393, 'max_depth': 15, 'min_child_weight': 132, 'eval_metric': 'logloss'}. Best is trial 1 with value: 0.7730942252850145.[0m


CPU times: user 4min 14s, sys: 225 ms, total: 4min 14s
Wall time: 4min 3s


In [None]:
%%time
study.optimize(objective, n_trials=50)

[32m[I 2021-11-14 13:17:28,265][0m Trial 2 finished with value: 0.7447187548682155 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.031884295075786204, 'alpha': 0.019275246876602856, 'colsample_bytree': 0.35920920781450877, 'subsample': 0.4096600473494486, 'learning_rate': 0.00157749203914937, 'max_depth': 3, 'min_child_weight': 98, 'eval_metric': 'logloss'}. Best is trial 1 with value: 0.7730942252850145.[0m
[32m[I 2021-11-14 13:18:54,672][0m Trial 3 finished with value: 0.7867325176265696 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.04174785621450093, 'alpha': 0.9653706462566634, 'colsample_bytree': 0.4171517448821714, 'subsample': 0.9817759401632996, 'learning_rate': 0.0064583697026000435, 'max_depth': 15, 'min_child_weight': 292, 'eval_metric': 'logloss'}. Best is trial 3 with value: 0.7867325176265696.[0m
[32m[I 2021-11-14 13:19:49,824][0m Trial 4 finished with value: 0.7950120670296091 and p

In [17]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 0.07382375583625363,
 'alpha': 0.0023919654661293415,
 'colsample_bytree': 0.8157006290650177,
 'subsample': 0.8206021006500338,
 'learning_rate': 0.03048344086934949,
 'max_depth': 9,
 'min_child_weight': 269,
 'eval_metric': 'logloss'}