In [1]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import optuna
import gc
xgb.__version__

'1.6.0-dev'

In [2]:
!nvidia-smi

Sat Nov 27 10:17:14 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:01:00.0 Off |                    0 |
| N/A   37C    P0    62W / 275W |      0MiB / 81251MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  On   | 00000000:47:00.0 Off |                    0 |
| N/A   37C    P0    61W / 275W |      0MiB / 81251MiB |      0%      Default |
|       

In [3]:
train_x = dd.read_csv('../input/xgtrain.csv')
test_x = dd.read_csv('../input/xgval.csv')

In [4]:
train_y = train_x['target']
train_x = train_x[train_x.columns.difference(['target'])]

test_y = test_x['target']
test_x = test_x[test_x.columns.difference(['target'])]

In [5]:
train_x = train_x.replace([np.inf, -np.inf], np.nan)
test_x = test_x.replace([np.inf, -np.inf], np.nan)

In [6]:
cluster = LocalCUDACluster(CUDA_VISIBLE_DEVICES=[0,1,2,4])
client = Client(cluster)

distributed.preloading - INFO - Import preload module: dask_cuda.initialize
distributed.preloading - INFO - Import preload module: dask_cuda.initialize
distributed.preloading - INFO - Import preload module: dask_cuda.initialize
distributed.preloading - INFO - Import preload module: dask_cuda.initialize


In [7]:
!nvidia-smi

Sat Nov 27 10:17:27 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:01:00.0 Off |                    0 |
| N/A   37C    P0    65W / 275W |    414MiB / 81251MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  On   | 00000000:47:00.0 Off |                    0 |
| N/A   37C    P0    64W / 275W |    414MiB / 81251MiB |      0%      Default |
|       

In [8]:
dtrain = xgb.dask.DaskDMatrix(client, train_x, train_y)

In [9]:
dtest = xgb.dask.DaskDMatrix(client, test_x, test_y)

In [10]:
num_round = 1000

In [11]:
def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['binary:logistic']), 
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15,17,20]),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['logloss']),

    }

    output = xgb.dask.train(client, params, dtrain, num_round)
    
    booster = output['booster']  # booster is the trained model
    booster.set_param({'predictor': 'gpu_predictor'})

    predictions = xgb.dask.predict(client, booster, dtest)
    
    predictions = predictions.compute()

    roc = roc_auc_score(test_y, predictions)
    
    return roc

In [12]:
study = optuna.create_study(direction='maximize')

[32m[I 2021-11-27 10:06:44,215][0m A new study created in memory with name: no-name-ad9d6834-4fd1-47d0-8c37-14780d23b331[0m


In [13]:
%%time
study.optimize(objective, n_trials=2)

[10:06:45] task [xgboost.dask]:tcp://10.110.42.38:43079 got new rank 0
[10:06:45] task [xgboost.dask]:tcp://10.110.42.38:33235 got new rank 1
[10:06:45] task [xgboost.dask]:tcp://10.110.42.38:46775 got new rank 2
[10:06:45] task [xgboost.dask]:tcp://10.110.42.38:36273 got new rank 3


KeyboardInterrupt: 

distributed.preloading - INFO - Import preload module: dask_cuda.initialize


In [14]:
%%time
study.optimize(objective, n_trials=50)

[07:37:20] task [xgboost.dask]:tcp://10.110.42.38:46265 got new rank 0
[07:37:20] task [xgboost.dask]:tcp://10.110.42.38:41627 got new rank 1
[07:37:20] task [xgboost.dask]:tcp://10.110.42.38:35835 got new rank 2
[32m[I 2021-11-16 07:37:46,818][0m Trial 2 finished with value: 0.7940241868536972 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 9.061237253240161, 'alpha': 0.0019204622857863646, 'colsample_bytree': 0.7726912957479601, 'subsample': 0.4236092264480867, 'learning_rate': 0.034919216409885316, 'max_depth': 3, 'min_child_weight': 119, 'eval_metric': 'logloss'}. Best is trial 0 with value: 0.7966626105402218.[0m
[07:37:46] task [xgboost.dask]:tcp://10.110.42.38:41627 got new rank 0
[07:37:46] task [xgboost.dask]:tcp://10.110.42.38:46265 got new rank 1
[07:37:46] task [xgboost.dask]:tcp://10.110.42.38:35835 got new rank 2
[32m[I 2021-11-16 07:38:24,718][0m Trial 3 finished with value: 0.7678394379532528 and parameters: {'objective': 'bina

CPU times: user 1min 40s, sys: 39.5 s, total: 2min 19s
Wall time: 48min 18s


In [16]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 0.009133821786890468,
 'alpha': 1.078618704061085,
 'colsample_bytree': 0.468630629390712,
 'subsample': 0.8630949652622542,
 'learning_rate': 0.049197813915787224,
 'max_depth': 5,
 'min_child_weight': 144,
 'eval_metric': 'logloss'}

In [17]:
%%time
study.optimize(objective, n_trials=100)

[08:27:40] task [xgboost.dask]:tcp://10.110.42.38:35835 got new rank 0
[08:27:40] task [xgboost.dask]:tcp://10.110.42.38:46265 got new rank 1
[08:27:40] task [xgboost.dask]:tcp://10.110.42.38:41627 got new rank 2
[32m[I 2021-11-16 08:28:09,187][0m Trial 52 finished with value: 0.7956698124980207 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.0093796321322138, 'alpha': 0.3903499023742368, 'colsample_bytree': 0.44183623311452663, 'subsample': 0.7790076280176044, 'learning_rate': 0.06970589670538704, 'max_depth': 5, 'min_child_weight': 208, 'eval_metric': 'logloss'}. Best is trial 45 with value: 0.7976124053061373.[0m
[08:28:09] task [xgboost.dask]:tcp://10.110.42.38:46265 got new rank 0
[08:28:09] task [xgboost.dask]:tcp://10.110.42.38:35835 got new rank 1
[08:28:09] task [xgboost.dask]:tcp://10.110.42.38:41627 got new rank 2
[32m[I 2021-11-16 08:28:38,120][0m Trial 53 finished with value: 0.7949806183344965 and parameters: {'objective': 'bin

CPU times: user 3min 27s, sys: 1min 2s, total: 4min 30s
Wall time: 1h 30min 3s


In [18]:
%%time
study.optimize(objective, n_trials=300)

[09:57:44] task [xgboost.dask]:tcp://10.110.42.38:46265 got new rank 0
[09:57:44] task [xgboost.dask]:tcp://10.110.42.38:35835 got new rank 1
[09:57:44] task [xgboost.dask]:tcp://10.110.42.38:41627 got new rank 2
[32m[I 2021-11-16 09:58:36,754][0m Trial 152 finished with value: 0.7971659375008125 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.002982719926217807, 'alpha': 0.1361807901664796, 'colsample_bytree': 0.39154055429073986, 'subsample': 0.854827108956643, 'learning_rate': 0.01757495341663751, 'max_depth': 11, 'min_child_weight': 141, 'eval_metric': 'logloss'}. Best is trial 134 with value: 0.7983349950057206.[0m
[09:58:36] task [xgboost.dask]:tcp://10.110.42.38:41627 got new rank 0
[09:58:36] task [xgboost.dask]:tcp://10.110.42.38:46265 got new rank 1
[09:58:36] task [xgboost.dask]:tcp://10.110.42.38:35835 got new rank 2
[32m[I 2021-11-16 09:59:26,372][0m Trial 153 finished with value: 0.7968914489580162 and parameters: {'objective':

CPU times: user 10min 12s, sys: 3min 1s, total: 13min 14s
Wall time: 4h 20min 12s


In [20]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 0.8554497540535646,
 'alpha': 0.02036030443398669,
 'colsample_bytree': 0.3121838929659706,
 'subsample': 0.8486386194504626,
 'learning_rate': 0.02526480608674796,
 'max_depth': 11,
 'min_child_weight': 133,
 'eval_metric': 'logloss'}