In [1]:
%load_ext watermark

In [2]:
%watermark

Last updated: 2023-09-07T07:33:19.114360-07:00

Python implementation: CPython
Python version       : 3.10.12
IPython version      : 8.14.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 5.15.0-1029-nvidia
Machine     : x86_64
Processor   : x86_64
CPU cores   : 256
Architecture: 64bit



In [3]:
%watermark --gpu

GPU Info: 
  GPU 0: NVIDIA A100-SXM4-80GB
  GPU 1: NVIDIA A100-SXM4-80GB
  GPU 2: NVIDIA A100-SXM4-80GB
  GPU 3: NVIDIA A100-SXM4-80GB
  GPU 4: NVIDIA A100-SXM4-80GB
  GPU 5: NVIDIA A100-SXM4-80GB
  GPU 6: NVIDIA A100-SXM4-80GB
  GPU 7: NVIDIA A100-SXM4-80GB



In [4]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
from dask.delayed import delayed
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

import optuna
import gc
import logging

In [5]:
%watermark --iversions

logging: 0.5.1.2
numpy  : 1.24.4
pandas : 1.5.3
xgboost: 1.7.6
optuna : 3.3.0
dask   : 2023.3.2



In [6]:
!nvidia-smi -L

GPU 0: NVIDIA A100-SXM4-80GB (UUID: GPU-cf1d9297-d9c3-6b2a-8afa-e63e4b5abb2d)
GPU 1: NVIDIA A100-SXM4-80GB (UUID: GPU-6d18acdc-2136-40ac-5eeb-724e81476385)
GPU 2: NVIDIA A100-SXM4-80GB (UUID: GPU-a87c7d38-52d2-befc-e2de-3cf90f75d6eb)
GPU 3: NVIDIA A100-SXM4-80GB (UUID: GPU-8e290e7b-dec5-1828-2ec2-a6930ecd4b5b)
GPU 4: NVIDIA A100-SXM4-80GB (UUID: GPU-36f4174f-2697-e22e-3ae5-0d30e814c4f1)
GPU 5: NVIDIA A100-SXM4-80GB (UUID: GPU-d0929bb4-8c8c-8418-70d2-f658013f4b33)
GPU 6: NVIDIA A100-SXM4-80GB (UUID: GPU-78f54132-3566-5a8c-b51b-fb3cc695d648)
GPU 7: NVIDIA A100-SXM4-80GB (UUID: GPU-61b9f63d-cb20-77e8-8b22-40f40fbb8715)


In [7]:
cluster = LocalCUDACluster(n_workers=8)
client = Client(cluster)

2023-09-07 07:34:51,401 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-09-07 07:34:51,401 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-09-07 07:34:51,405 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-09-07 07:34:51,405 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-09-07 07:34:51,405 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-09-07 07:34:51,406 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-09-07 07:34:51,411 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-09-07 07:34:51,411 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-09-07 07:34:51,412 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-09-07 07:34:51,412 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-

In [8]:
!nvidia-smi

Thu Sep  7 07:35:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   34C    P0    70W / 400W |    417MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  On   | 00000000:0F:00.0 Off |                    0 |
| N/A   31C    P0    68W / 400W |    417MiB / 81920MiB |      0%      Default |
|       

In [9]:
def gini(actual, pred, cmpcol = 0, sortcol = 1):
    assert( len(actual) == len(pred) )
    all = np.asarray(np.c_[ actual, pred, np.arange(len(actual)) ], dtype=float)
    all = all[ np.lexsort((all[:,2], -1*all[:,1])) ]
    totalLosses = all[:,0].sum()
    giniSum = all[:,0].cumsum().sum() / totalLosses
    
    giniSum -= (len(actual) + 1) / 2.
    return giniSum / len(actual)
 
def gini_normalized(a, p):
    return gini(a, p) / gini(a, a)

In [10]:
train_folds = []
val_folds = []
train_ys = []
val_ys = []

for i in range(5):
    print(f'Loading fold {i}')
    train_fold_d = delayed(pd.read_csv)(f'../input/xgtrain_fold_{i}.csv.gz')
    train_fold = dd.from_delayed(train_fold_d)
    
    val_fold_d = delayed(pd.read_csv)(f'../input/xgval_fold_{i}.csv.gz')
    val_fold = dd.from_delayed(val_fold_d)
    
    
    train_y = train_fold['target']
    train_fold = train_fold[train_fold.columns.difference(['target'])]
    
    val_y = val_fold['target']
    val_fold = val_fold[val_fold.columns.difference(['target'])]
    
    train_folds.append(train_fold)
    val_folds.append(val_fold)
    
    train_ys.append(train_y)
    val_ys.append(val_y)

Loading fold 0
Loading fold 1
Loading fold 2
Loading fold 3
Loading fold 4


In [11]:
train = pd.read_csv('../input/train.csv.zip')

target = train['target'].values
target

array([0, 0, 0, ..., 0, 0, 0])

In [12]:
train_oof = np.zeros((target.shape[0],))

num_round = 1000

def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['binary:logistic']),
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_int('max_depth', 3, 25),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['logloss']),

    }

    kf = KFold(5, shuffle=True, random_state=137)

    for i, (train_index, val_index) in enumerate(kf.split(train,target)):
        dtrain = xgb.dask.DaskDMatrix(client, train_folds[i].values, train_ys[i], enable_categorical=True)
        dval = xgb.dask.DaskDMatrix(client, val_folds[i].values, val_ys[i], enable_categorical=True)
        
        output = xgb.dask.train(client, params, dtrain, num_round)
        booster = output['booster']  # booster is the trained model
        booster.set_param({'predictor': 'gpu_predictor'})
        predictions = xgb.dask.predict(client, booster, dval)
        predictions = predictions.compute()
        train_oof[val_index] = predictions
        del dtrain, dval, output
        gc.collect()
        gc.collect()

    gini = gini_normalized(target, train_oof)
    
    return gini

In [13]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Setup the root logger.
logger.addHandler(logging.FileHandler("optuna_xgb_output_0.log", mode="w"))

optuna.logging.enable_propagation()  # Propagate logs to the root logger.
optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.

study = optuna.create_study(storage="sqlite:///xgb_optuna_allstate_0.db", study_name="five_fold_optuna_xgb_0", direction='maximize')

In [14]:
%%time
logger.info("Start optimization.")
study.optimize(objective, n_trials=3)

  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[07:44:46] task [xgboost.dask-2]:tcp://127.0.0.1:40123 got new rank 0
[07:44:55] task [xgboost.dask-7]:tcp://127.0.0.1:39891 got new rank 0
[07:45:03] task [xgboost.dask-1]:tcp://127.0.0.1:38477 got new rank 0
[07:45:11] task [xgboost.dask-5]:tcp://127.0.0.1:35333 got new rank 0
[07:45:18] task [xgboost.dask-6]:tcp://127.0.0.1:34545 got new rank 0
  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[07:45

CPU times: user 57.1 s, sys: 5.77 s, total: 1min 2s
Wall time: 1min 57s


In [15]:
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.head()

Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_objective,params_subsample,params_tree_method,state
0,0,0.26058,0.518387,0.588129,logloss,0.80362,0.001024,19,73,binary:logistic,0.734509,gpu_hist,COMPLETE
1,1,0.272333,0.554771,0.38644,logloss,0.515043,0.004684,19,226,binary:logistic,0.740431,gpu_hist,COMPLETE
2,2,0.27331,0.001337,0.321878,logloss,0.224638,0.035482,25,235,binary:logistic,0.647692,gpu_hist,COMPLETE


In [16]:
%%time
study.optimize(objective, n_trials=5)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head(8)

  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[07:47:32] task [xgboost.dask-3]:tcp://127.0.0.1:42235 got new rank 0
[07:48:25] task [xgboost.dask-2]:tcp://127.0.0.1:40123 got new rank 0
[07:49:20] task [xgboost.dask-7]:tcp://127.0.0.1:39891 got new rank 0
[07:50:16] task [xgboost.dask-1]:tcp://127.0.0.1:38477 got new rank 0
[07:51:20] task [xgboost.dask-5]:tcp://127.0.0.1:35333 got new rank 0
  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[07:52

CPU times: user 35.4 s, sys: 31 s, total: 1min 6s
Wall time: 9min 8s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_objective,params_subsample,params_tree_method,state
0,0,0.26058,0.518387,0.588129,logloss,0.80362,0.001024,19,73,binary:logistic,0.734509,gpu_hist,COMPLETE
1,1,0.272333,0.554771,0.38644,logloss,0.515043,0.004684,19,226,binary:logistic,0.740431,gpu_hist,COMPLETE
2,2,0.27331,0.001337,0.321878,logloss,0.224638,0.035482,25,235,binary:logistic,0.647692,gpu_hist,COMPLETE
3,3,0.213742,0.475032,0.907356,logloss,0.025882,0.019162,21,5,binary:logistic,0.939027,gpu_hist,COMPLETE
4,4,0.250605,7.983068,0.797204,logloss,1.452732,0.037789,8,12,binary:logistic,0.532211,gpu_hist,COMPLETE
5,5,0.279674,0.039072,0.650555,logloss,3.588531,0.005713,16,70,binary:logistic,0.882057,gpu_hist,COMPLETE
6,6,0.27053,0.040732,0.58967,logloss,0.035789,0.001149,24,155,binary:logistic,0.813833,gpu_hist,COMPLETE
7,7,0.23164,8.520712,0.554478,logloss,2.060507,0.039615,23,10,binary:logistic,0.537622,gpu_hist,COMPLETE


In [17]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 3.58853089923261,
 'alpha': 0.03907190647684065,
 'colsample_bytree': 0.6505547297793564,
 'subsample': 0.8820571272532136,
 'learning_rate': 0.005713381470116442,
 'max_depth': 16,
 'min_child_weight': 70,
 'eval_metric': 'logloss'}

In [19]:
df.value.max()

0.2796737747509847

In [20]:
%%time
study.optimize(objective, n_trials=100)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head(20)

  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[07:57:57] task [xgboost.dask-2]:tcp://127.0.0.1:40123 got new rank 0
[07:58:38] task [xgboost.dask-7]:tcp://127.0.0.1:39891 got new rank 0
[07:59:20] task [xgboost.dask-1]:tcp://127.0.0.1:38477 got new rank 0
[08:00:02] task [xgboost.dask-5]:tcp://127.0.0.1:35333 got new rank 0
[08:00:44] task [xgboost.dask-6]:tcp://127.0.0.1:34545 got new rank 0
  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[08:01

CPU times: user 6min 34s, sys: 3min 12s, total: 9min 47s
Wall time: 1h 4min 55s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_objective,params_subsample,params_tree_method,state
0,0,0.26058,0.518387,0.588129,logloss,0.80362,0.001024,19,73,binary:logistic,0.734509,gpu_hist,COMPLETE
1,1,0.272333,0.554771,0.38644,logloss,0.515043,0.004684,19,226,binary:logistic,0.740431,gpu_hist,COMPLETE
2,2,0.27331,0.001337,0.321878,logloss,0.224638,0.035482,25,235,binary:logistic,0.647692,gpu_hist,COMPLETE
3,3,0.213742,0.475032,0.907356,logloss,0.025882,0.019162,21,5,binary:logistic,0.939027,gpu_hist,COMPLETE
4,4,0.250605,7.983068,0.797204,logloss,1.452732,0.037789,8,12,binary:logistic,0.532211,gpu_hist,COMPLETE
5,5,0.279674,0.039072,0.650555,logloss,3.588531,0.005713,16,70,binary:logistic,0.882057,gpu_hist,COMPLETE
6,6,0.27053,0.040732,0.58967,logloss,0.035789,0.001149,24,155,binary:logistic,0.813833,gpu_hist,COMPLETE
7,7,0.23164,8.520712,0.554478,logloss,2.060507,0.039615,23,10,binary:logistic,0.537622,gpu_hist,COMPLETE
8,8,0.230918,0.001495,0.590447,logloss,0.322005,0.014818,18,7,binary:logistic,0.822372,gpu_hist,COMPLETE
9,9,0.278363,0.040354,0.997855,logloss,0.38083,0.006666,15,32,binary:logistic,0.484852,gpu_hist,COMPLETE


In [21]:
df.value.max()

0.2834224878819652

In [22]:
study.best_trial.params

{'objective': 'binary:logistic',
 'tree_method': 'gpu_hist',
 'lambda': 4.645511068481069,
 'alpha': 0.6541465638507157,
 'colsample_bytree': 0.917582587241956,
 'subsample': 0.6600696894938839,
 'learning_rate': 0.013000704738565036,
 'max_depth': 7,
 'min_child_weight': 194,
 'eval_metric': 'logloss'}

In [None]:
%%time
study.optimize(objective, n_trials=100)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head(20)

  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[09:11:44] task [xgboost.dask-6]:tcp://127.0.0.1:34545 got new rank 0
[09:11:52] task [xgboost.dask-4]:tcp://127.0.0.1:44281 got new rank 0
[09:12:00] task [xgboost.dask-0]:tcp://127.0.0.1:43067 got new rank 0
[09:12:07] task [xgboost.dask-3]:tcp://127.0.0.1:42235 got new rank 0
[09:12:15] task [xgboost.dask-2]:tcp://127.0.0.1:40123 got new rank 0
  'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
  'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
  'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
[09:12