In [1]:
import os
#os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 4"

In [2]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
import optuna
import gc
xgb.__version__

'1.6.2'

In [3]:
!nvidia-smi

Wed Nov 23 11:01:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.142.00   Driver Version: 450.142.00   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-DGXS...  On   | 00000000:07:00.0  On |                    0 |
| N/A   39C    P0    37W / 300W |    357MiB / 32499MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-DGXS...  On   | 00000000:08:00.0 Off |                    0 |
| N/A   40C    P0    36W / 300W |      1MiB / 32508MiB |      0%      Default |
|       

In [4]:
!nvidia-smi -L

GPU 0: Tesla V100-DGXS-32GB (UUID: GPU-d752af09-1f62-bf3b-4f70-78b84e9e41f6)
GPU 1: Tesla V100-DGXS-32GB (UUID: GPU-054a4a35-f98a-3ebc-9100-0f697c246b43)
GPU 2: Tesla V100-DGXS-32GB (UUID: GPU-454525d4-bebd-7fb8-0ba3-3b85e2f99abd)
GPU 3: Tesla V100-DGXS-32GB (UUID: GPU-af0fb74e-f5eb-0833-17ff-494cc6bdcee1)


In [5]:
train_x = dd.read_csv('../input/higgs_small_roc/xgtrain_25_1.csv')
test_x = dd.read_csv('../input/higgs_small_roc/xgval.csv')

In [6]:
train_y = train_x['target']
train_x = train_x[train_x.columns.difference(['target'])]

test_y = test_x['target']
test_x = test_x[test_x.columns.difference(['target'])]

In [7]:
cluster = LocalCUDACluster(n_workers=4)
client = Client(cluster)

In [8]:
dtrain = xgb.dask.DaskDMatrix(client, train_x, train_y)

In [9]:
dtest = xgb.dask.DaskDMatrix(client, test_x, test_y)


In [10]:
num_round = 1000

In [11]:
def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['binary:logistic']),
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_int('max_depth', 3,25),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['logloss']),

    }

    output = xgb.dask.train(client, params, dtrain, num_round)
    
    booster = output['booster']  # booster is the trained model
    booster.set_param({'predictor': 'gpu_predictor'})

    predictions = xgb.dask.predict(client, booster, dtest)
    
    predictions = predictions.compute()

    roc = roc_auc_score(test_y, predictions)
    
    return roc

In [12]:
study = optuna.create_study(direction='maximize')

[32m[I 2022-11-23 11:01:51,497][0m A new study created in memory with name: no-name-c88668df-4c1d-467c-8e76-d61368f35b5c[0m


In [13]:
%%time
study.optimize(objective, n_trials=2)

  client.wait_for_workers(n_workers)
[32m[I 2022-11-23 11:03:11,014][0m Trial 0 finished with value: 0.7967290421387867 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 1.4302933999940595, 'alpha': 0.36678549574141117, 'colsample_bytree': 0.5241408612068813, 'subsample': 0.5056222280707688, 'learning_rate': 0.0013535646005831616, 'max_depth': 17, 'min_child_weight': 199, 'eval_metric': 'logloss'}. Best is trial 0 with value: 0.7967290421387867.[0m
  client.wait_for_workers(n_workers)
[32m[I 2022-11-23 11:06:38,348][0m Trial 1 finished with value: 0.8104304353378587 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.0024972685915398253, 'alpha': 3.214886070218261, 'colsample_bytree': 0.7403875154996924, 'subsample': 0.7212559329638162, 'learning_rate': 0.012810977366741276, 'max_depth': 17, 'min_child_weight': 25, 'eval_metric': 'logloss'}. Best is trial 1 with value: 0.8104304353378587.[0m


CPU times: user 21.4 s, sys: 7.75 s, total: 29.1 s
Wall time: 4min 45s


In [14]:
%%time
study.optimize(objective, n_trials=250)

  client.wait_for_workers(n_workers)
[32m[I 2022-11-23 11:08:31,273][0m Trial 2 finished with value: 0.8042090635425153 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.0018430598756334455, 'alpha': 0.5893734065103096, 'colsample_bytree': 0.841668298873077, 'subsample': 0.5185479948628189, 'learning_rate': 0.002722620736173878, 'max_depth': 22, 'min_child_weight': 225, 'eval_metric': 'logloss'}. Best is trial 1 with value: 0.8104304353378587.[0m
  client.wait_for_workers(n_workers)
[32m[I 2022-11-23 11:09:07,546][0m Trial 3 finished with value: 0.8077696526746703 and parameters: {'objective': 'binary:logistic', 'tree_method': 'gpu_hist', 'lambda': 0.41859399764712163, 'alpha': 0.05469976389292323, 'colsample_bytree': 0.7507519379701724, 'subsample': 0.7774787451386951, 'learning_rate': 0.06204796092753033, 'max_depth': 18, 'min_child_weight': 285, 'eval_metric': 'logloss'}. Best is trial 1 with value: 0.8104304353378587.[0m
  client.wait_for

CPU times: user 1h 6min 36s, sys: 16min 5s, total: 1h 22min 42s
Wall time: 14h 43min 19s


In [15]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 0.9614672750303118,
 'alpha': 1.5066291720335054,
 'colsample_bytree': 0.9626922199863466,
 'subsample': 0.519587445251974,
 'learning_rate': 0.0139227853701842,
 'max_depth': 25,
 'min_child_weight': 238,
 'eval_metric': 'logloss'}

In [16]:
real_test_x = dd.read_csv('../input/higgs_small_roc/xgtest.csv')

In [17]:
real_test_y = real_test_x['target']
real_test_x = real_test_x[real_test_x.columns.difference(['target'])]

In [18]:
drealtest = xgb.dask.DaskDMatrix(client, real_test_x, real_test_y)

In [19]:
%%time
params = study.best_trial.params

output = xgb.dask.train(client, params, dtrain, num_round)
    
booster = output['booster']  # booster is the trained model
booster.set_param({'predictor': 'gpu_predictor'})

predictions = xgb.dask.predict(client, booster, drealtest)
    
predictions = predictions.compute()

roc_auc_score(real_test_y, predictions)

  client.wait_for_workers(n_workers)


CPU times: user 31 s, sys: 7.05 s, total: 38 s
Wall time: 6min 34s


0.8070034440592782

In [20]:
%%time 
scores = []

for jj in range(15):
    print(jj)
    params = study.best_trial.params
    params['seed'] = 5*jj**3 + 137

    output = xgb.dask.train(client, params, dtrain, num_round)

    booster = output['booster']  # booster is the trained model
    booster.set_param({'predictor': 'gpu_predictor'})

    predictions = xgb.dask.predict(client, booster, drealtest)

    predictions = predictions.compute()
    score = roc_auc_score(real_test_y, predictions)
    print(score)

    scores.append(score)

0


  client.wait_for_workers(n_workers)


0.8073562559858825
1


  client.wait_for_workers(n_workers)


0.8077819199019456
2


  client.wait_for_workers(n_workers)


0.807922045971056
3


  client.wait_for_workers(n_workers)


0.8073396787927504
4


  client.wait_for_workers(n_workers)


0.8065484243709262
5


  client.wait_for_workers(n_workers)


0.8075068595506836
6


  client.wait_for_workers(n_workers)


0.8073579570135694
7


  client.wait_for_workers(n_workers)


0.8073540122990562
8


  client.wait_for_workers(n_workers)


0.8071119654513973
9


  client.wait_for_workers(n_workers)


0.8073857891659668
10


  client.wait_for_workers(n_workers)


0.8072857676944056
11


  client.wait_for_workers(n_workers)


0.8071454485638708
12


  client.wait_for_workers(n_workers)


0.8077363782772502
13


  client.wait_for_workers(n_workers)


0.8072166464865299
14


  client.wait_for_workers(n_workers)


0.8072339020035861
CPU times: user 7min 33s, sys: 1min 48s, total: 9min 21s
Wall time: 1h 38min 57s


In [21]:
import numpy as np
np.mean(scores)

0.8073522034352585

In [22]:
scores

[0.8073562559858825,
 0.8077819199019456,
 0.807922045971056,
 0.8073396787927504,
 0.8065484243709262,
 0.8075068595506836,
 0.8073579570135694,
 0.8073540122990562,
 0.8071119654513973,
 0.8073857891659668,
 0.8072857676944056,
 0.8071454485638708,
 0.8077363782772502,
 0.8072166464865299,
 0.8072339020035861]

In [25]:
%%time
shap_val_preds = xgb.dask.predict(client, booster, dtest, pred_contribs=True)

CPU times: user 662 ms, sys: 132 ms, total: 794 ms
Wall time: 8.07 s


In [26]:
%%time
shap_test_preds = xgb.dask.predict(client, booster, drealtest, pred_contribs=True)

CPU times: user 732 ms, sys: 163 ms, total: 895 ms
Wall time: 9.59 s
