In [1]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import optuna
import gc
xgb.__version__

'1.6.0-dev'

In [2]:
!nvidia-smi

Fri Dec  3 16:17:16 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN RTX    On   | 00000000:09:00.0 Off |                  N/A |
| 40%   48C    P8    29W / 280W |      6MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN RTX    On   | 00000000:41:00.0  On |                  N/A |
| 40%   51C    P8    34W / 280W |    367MiB / 24217MiB |      5%      Default |
|       

In [3]:
train_x = dd.read_csv('../../TPS_2021/input/tabular-playground-series-dec-2021/xgtrain.csv')
test_x = dd.read_csv('../../TPS_2021/input/tabular-playground-series-dec-2021/xgval.csv')

In [4]:
train_y = train_x['target'] -1 
train_x = train_x[train_x.columns.difference(['target'])]

test_y = test_x['target'] - 1
test_x = test_x[test_x.columns.difference(['target'])]

In [5]:
cluster = LocalCUDACluster(CUDA_VISIBLE_DEVICES=[0,1])
client = Client(cluster)

distributed.preloading - INFO - Import preload module: dask_cuda.initialize
distributed.preloading - INFO - Import preload module: dask_cuda.initialize


In [6]:
!nvidia-smi

Fri Dec  3 16:17:20 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN RTX    On   | 00000000:09:00.0 Off |                  N/A |
| 40%   49C    P2    69W / 280W |    326MiB / 24220MiB |      1%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN RTX    On   | 00000000:41:00.0  On |                  N/A |
| 40%   51C    P2    78W / 280W |    537MiB / 24217MiB |      4%      Default |
|       

In [7]:
train_y.values

Unnamed: 0,Array,Chunk
Bytes,unknown,unknown
Shape,"(nan,)","(nan,)"
Count,76 Tasks,19 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes unknown unknown Shape (nan,) (nan,) Count 76 Tasks 19 Chunks Type int64 numpy.ndarray",,

Unnamed: 0,Array,Chunk
Bytes,unknown,unknown
Shape,"(nan,)","(nan,)"
Count,76 Tasks,19 Chunks
Type,int64,numpy.ndarray


In [8]:
dtrain = xgb.dask.DaskDMatrix(client, train_x, train_y)

In [9]:
dtest = xgb.dask.DaskDMatrix(client, test_x, test_y)

In [10]:
num_round = 1000

In [23]:
def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['multi:softprob']), 
        'num_class': trial.suggest_categorical('num_class',[6]), 
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15,17,20]),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['mlogloss']),

    }

    output = xgb.dask.train(client, params, dtrain, num_round)
    
    booster = output['booster']  # booster is the trained model
    booster.set_param({'predictor': 'gpu_predictor'})

    predictions = xgb.dask.predict(client, booster, dtest)
    
    predictions = np.argmax(predictions.compute(), axis=1)

    roc = accuracy_score(test_y, predictions)
    
    return roc

In [28]:
study = optuna.create_study(direction='maximize', storage="sqlite:///xgb_optuna_tests.db", study_name="dec_2021_test_0")

[32m[I 2021-12-03 16:39:02,385][0m A new study created in RDB with name: dec_2021_test_0[0m


In [29]:
%%time
study.optimize(objective, n_trials=2)

[16:39:31] task [xgboost.dask]:tcp://192.168.1.200:46165 got new rank 0
[16:39:31] task [xgboost.dask]:tcp://192.168.1.200:34617 got new rank 1
[32m[I 2021-12-03 16:40:45,308][0m Trial 0 finished with value: 0.9286275 and parameters: {'objective': 'multi:softprob', 'num_class': 6, 'tree_method': 'gpu_hist', 'lambda': 0.0010543052269471405, 'alpha': 0.45547901897758103, 'colsample_bytree': 0.48583325031263813, 'subsample': 0.5898352473849167, 'learning_rate': 0.006099803484563795, 'max_depth': 5, 'min_child_weight': 104, 'eval_metric': 'mlogloss'}. Best is trial 0 with value: 0.9286275.[0m
[16:40:45] task [xgboost.dask]:tcp://192.168.1.200:34617 got new rank 0
[16:40:45] task [xgboost.dask]:tcp://192.168.1.200:46165 got new rank 1
[32m[I 2021-12-03 16:46:26,158][0m Trial 1 finished with value: 0.96010375 and parameters: {'objective': 'multi:softprob', 'num_class': 6, 'tree_method': 'gpu_hist', 'lambda': 0.0017170221867173663, 'alpha': 0.334371331440923, 'colsample_bytree': 0.751790

CPU times: user 24.2 s, sys: 8.7 s, total: 32.9 s
Wall time: 6min 54s


In [33]:
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))

In [34]:
df.head()

Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_num_class,params_objective,params_subsample,params_tree_method,state
0,0,0.928628,0.455479,0.485833,mlogloss,0.001054,0.0061,5,104,6,multi:softprob,0.589835,gpu_hist,COMPLETE
1,1,0.960104,0.334371,0.75179,mlogloss,0.001717,0.009002,15,78,6,multi:softprob,0.748352,gpu_hist,COMPLETE


In [35]:
df.to_csv('optuna_xgb_output_0.csv', index=False)

In [36]:
study.best_trial.params

{'alpha': 0.334371331440923,
 'colsample_bytree': 0.7517901383752162,
 'eval_metric': 'mlogloss',
 'lambda': 0.0017170221867173663,
 'learning_rate': 0.009002123522299416,
 'max_depth': 15,
 'min_child_weight': 78,
 'num_class': 6,
 'objective': 'multi:softprob',
 'subsample': 0.7483524229625753,
 'tree_method': 'gpu_hist'}