In [1]:
import os
#os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 4"

In [2]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
import optuna
import gc
xgb.__version__

'1.6.2'

In [3]:
!nvidia-smi

Mon Jan  9 21:37:42 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN RTX    On   | 00000000:09:00.0 Off |                  N/A |
| 41%   35C    P8    32W / 280W |    905MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN RTX    On   | 00000000:41:00.0  On |                  N/A |
| 41%   34C    P8    32W / 280W |    364MiB / 24217MiB |      2%      Default |
|       

In [4]:
!nvidia-smi -L

GPU 0: NVIDIA TITAN RTX (UUID: GPU-0eb32f58-b8d5-17c0-e952-f4ec26a9353f)
GPU 1: NVIDIA TITAN RTX (UUID: GPU-50aeb092-88f5-4e0b-7f73-32741666f319)


In [6]:
train_x = dd.read_csv('../input/higgs_small_roc/xgtrain_aug_25.csv')
test_x = dd.read_csv('../input/higgs_small_roc/xgval_aug.csv')

In [7]:
train_y = train_x['target']
train_x = train_x[train_x.columns.difference(['target'])]

test_y = test_x['target']
test_x = test_x[test_x.columns.difference(['target'])]

In [8]:
cluster = LocalCUDACluster(n_workers=2)
client = Client(cluster)

2023-01-09 21:37:57,122 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-01-09 21:37:57,123 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-01-09 21:37:57,146 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-01-09 21:37:57,146 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize


In [9]:
dtrain = xgb.dask.DaskDMatrix(client, train_x, train_y)

In [10]:
dtest = xgb.dask.DaskDMatrix(client, test_x, test_y)


In [11]:
num_round = 1000

In [12]:
def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['binary:logistic']),
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15,17,20]),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['logloss']),

    }

    output = xgb.dask.train(client, params, dtrain, num_round)
    
    booster = output['booster']  # booster is the trained model
    booster.set_param({'predictor': 'gpu_predictor'})

    predictions = xgb.dask.predict(client, booster, dtest)
    
    predictions = predictions.compute()

    roc = roc_auc_score(test_y, predictions)
    
    return roc

In [13]:
study = optuna.create_study(direction='maximize')

[32m[I 2023-01-09 21:38:01,794][0m A new study created in memory with name: no-name-9ca9fc5c-a72c-4c28-bd17-fccb2a68209d[0m


In [14]:
%%time
study.optimize(objective, n_trials=2)

  client.wait_for_workers(n_workers)
[21:38:01] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[21:38:01] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1
[32m[I 2023-01-09 21:38:29,283][0m Trial 0 finished with value: 0.8011809749967193 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.12953960875828102, 'alpha': 9.207937126371373, 'colsample_bytree': 0.9223180647491764, 'subsample': 0.8175238469989368, 'learning_rate': 0.0029891504706391282, 'max_depth': 9, 'min_child_weight': 262, 'eval_metric': 'logloss'}. Best is trial 0 with value: 0.8011809749967193.[0m
  client.wait_for_workers(n_workers)
[21:38:29] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[21:38:29] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1
[32m[I 2023-01-09 21:38:32,602][0m Trial 1 finished with value: 0.7984630158229491 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.009260974084803478, 

CPU times: user 1.2 s, sys: 769 ms, total: 1.97 s
Wall time: 30.8 s


In [15]:
%%time
study.optimize(objective, n_trials=100)

  client.wait_for_workers(n_workers)
[21:38:32] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[21:38:32] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1
[32m[I 2023-01-09 21:38:35,958][0m Trial 2 finished with value: 0.7872497017331438 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.0014685210173758058, 'alpha': 0.1566296155004991, 'colsample_bytree': 0.36267754891812476, 'subsample': 0.4311259329526032, 'learning_rate': 0.015293863730239167, 'max_depth': 3, 'min_child_weight': 255, 'eval_metric': 'logloss'}. Best is trial 0 with value: 0.8011809749967193.[0m
  client.wait_for_workers(n_workers)
[21:38:35] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[21:38:35] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1
[32m[I 2023-01-09 21:38:41,048][0m Trial 3 finished with value: 0.8057171031811566 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.02927491160035201

CPU times: user 2min 24s, sys: 1min 9s, total: 3min 34s
Wall time: 1h 20min 41s


In [16]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 0.016258851543998167,
 'alpha': 1.7642994995727697,
 'colsample_bytree': 0.6586394819694826,
 'subsample': 0.8166184895179339,
 'learning_rate': 0.013460436828624106,
 'max_depth': 20,
 'min_child_weight': 274,
 'eval_metric': 'logloss'}

In [17]:
real_test_x = dd.read_csv('../input/higgs_small_roc/xgtest.csv')

In [18]:
real_test_y = real_test_x['target']
real_test_x = real_test_x[real_test_x.columns.difference(['target'])]

In [19]:
drealtest = xgb.dask.DaskDMatrix(client, real_test_x, real_test_y)

In [20]:
params = study.best_trial.params

output = xgb.dask.train(client, params, dtrain, num_round)
    
booster = output['booster']  # booster is the trained model
booster.set_param({'predictor': 'gpu_predictor'})

predictions = xgb.dask.predict(client, booster, drealtest)
    
predictions = predictions.compute()

roc_auc_score(real_test_y, predictions)

  client.wait_for_workers(n_workers)
[22:59:14] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[22:59:14] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


0.8058599621129998

In [21]:
%%time 
scores = []

for jj in range(15):
    print(jj)
    params = study.best_trial.params
    params['seed'] = 5*jj**3 + 137

    output = xgb.dask.train(client, params, dtrain, num_round)

    booster = output['booster']  # booster is the trained model
    booster.set_param({'predictor': 'gpu_predictor'})

    predictions = xgb.dask.predict(client, booster, drealtest)

    predictions = predictions.compute()

    scores.append(roc_auc_score(real_test_y, predictions))

0


  client.wait_for_workers(n_workers)
[23:00:32] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:00:32] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


1


  client.wait_for_workers(n_workers)
[23:01:49] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:01:49] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


2


  client.wait_for_workers(n_workers)
[23:03:08] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:03:08] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


3


  client.wait_for_workers(n_workers)
[23:04:26] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:04:26] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


4


  client.wait_for_workers(n_workers)
[23:05:45] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:05:45] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


5


  client.wait_for_workers(n_workers)
[23:07:03] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:07:03] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


6


  client.wait_for_workers(n_workers)
[23:08:21] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:08:21] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


7


  client.wait_for_workers(n_workers)
[23:09:38] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:09:38] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


8


  client.wait_for_workers(n_workers)
[23:10:57] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:10:57] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


9


  client.wait_for_workers(n_workers)
[23:12:15] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:12:15] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


10


  client.wait_for_workers(n_workers)
[23:13:33] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:13:33] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


11


  client.wait_for_workers(n_workers)
[23:14:51] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:14:51] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


12


  client.wait_for_workers(n_workers)
[23:16:09] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:16:09] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


13


  client.wait_for_workers(n_workers)
[23:17:27] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:17:27] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


14


  client.wait_for_workers(n_workers)
[23:18:45] task [xgboost.dask-0]:tcp://127.0.0.1:38053 got new rank 0
[23:18:45] task [xgboost.dask-1]:tcp://127.0.0.1:46637 got new rank 1


CPU times: user 30.9 s, sys: 18.8 s, total: 49.6 s
Wall time: 19min 31s


In [22]:
scores

[0.806064346329237,
 0.806149783836428,
 0.8062990359712568,
 0.8064492064523214,
 0.8060648681168711,
 0.8056599400413831,
 0.8054376167662889,
 0.806007252326325,
 0.8067147650507688,
 0.8070762125627157,
 0.8069742030802689,
 0.8061540259698925,
 0.8056558074833218,
 0.8063920759242751,
 0.8064956507696237]

In [23]:
import numpy as np
np.mean(scores)

0.8062396527120652

2023-01-10 09:34:38,358 - distributed.nanny - ERROR - Worker process died unexpectedly
2023-01-10 09:34:38,359 - distributed.nanny - ERROR - Worker process died unexpectedly
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
