In [1]:
%load_ext watermark

In [2]:
%watermark

Last updated: 2022-08-27T17:00:53.245832-04:00

Python implementation: CPython
Python version       : 3.8.10
IPython version      : 7.28.0

Compiler    : GCC 9.3.0
OS          : Linux
Release     : 5.11.0-37-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 32
Architecture: 64bit



In [3]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
from dask.delayed import delayed
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import optuna
import gc
import logging

In [4]:
%watermark --iversions

xgboost: 1.6.2
logging: 0.5.1.2
pandas : 1.4.3
optuna : 2.10.1
dask   : 2022.7.1
numpy  : 1.22.4



In [5]:
!nvidia-smi -L

GPU 0: NVIDIA TITAN RTX (UUID: GPU-0eb32f58-b8d5-17c0-e952-f4ec26a9353f)
GPU 1: NVIDIA TITAN RTX (UUID: GPU-50aeb092-88f5-4e0b-7f73-32741666f319)


In [6]:
cluster = LocalCUDACluster(n_workers=2)
client = Client(cluster)

2022-08-27 17:01:05,935 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2022-08-27 17:01:05,935 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2022-08-27 17:01:05,939 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2022-08-27 17:01:05,939 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize


In [7]:
!nvidia-smi

Sat Aug 27 17:01:07 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN RTX    On   | 00000000:09:00.0 Off |                  N/A |
| 41%   35C    P2    64W / 280W |    167MiB / 24220MiB |      1%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN RTX    On   | 00000000:41:00.0  On |                  N/A |
| 41%   35C    P2    71W / 280W |    616MiB / 24217MiB |      5%      Default |
|       

In [8]:
train_folds = []
val_folds = []
train_ys = []
val_ys = []

for i in range(5):
    print(f'Loading fold {i}')
    train_fold_d = delayed(pd.read_csv)(f'../input/xgtrain_fold_{i}.csv.gz')
    train_fold = dd.from_delayed(train_fold_d)
    
    val_fold_d = delayed(pd.read_csv)(f'../input/xgval_fold_{i}.csv.gz')
    val_fold = dd.from_delayed(val_fold_d)
    
    
    train_y = train_fold['target']
    train_fold = train_fold[train_fold.columns.difference(['target'])]
    
    val_y = val_fold['target']
    val_fold = val_fold[val_fold.columns.difference(['target'])]
    
    train_folds.append(train_fold)
    val_folds.append(val_fold)
    
    train_ys.append(train_y)
    val_ys.append(val_y)

Loading fold 0
Loading fold 1
Loading fold 2
Loading fold 3
Loading fold 4


In [9]:
train = pd.read_csv('../input/train.csv.gz')

target = train['label'].values
target

array([2, 9, 6, ..., 8, 8, 7])

In [10]:
train_oof = np.zeros((target.shape[0],))

num_round = 1000

def objective(trial):
        
    params = {
        'objective':'multi:softmax', 
        'num_class':10,
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15,17,20]),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['logloss']),
        'random_state': 137,

    }
    
    #start_time = time()
    kf = StratifiedKFold(5, shuffle=True, random_state=1974)

    for i, (train_index, val_index) in enumerate(kf.split(train,target)):
        dtrain = xgb.dask.DaskDMatrix(client, train_folds[i], train_ys[i])
        dval = xgb.dask.DaskDMatrix(client, val_folds[i], val_ys[i])
        
        output = xgb.dask.train(client, params, dtrain, num_round)
        booster = output['booster']  # booster is the trained model
        booster.set_param({'predictor': 'gpu_predictor'})
        predictions = xgb.dask.predict(client, booster, dval)
        predictions = predictions.compute()
        train_oof[val_index] = predictions
        del dtrain, dval, output
        gc.collect()
        gc.collect()


    acc = accuracy_score(target, train_oof)
    
    return acc

In [11]:
train_oof.shape

(60000,)

In [12]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Setup the root logger.
logger.addHandler(logging.FileHandler("optuna_xgb_output_0.log", mode="w"))

optuna.logging.enable_propagation()  # Propagate logs to the root logger.
optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.

study = optuna.create_study(storage="sqlite:///xgb_optuna_mnist_0.db", study_name="five_fold_optuna_xgb_0", direction='maximize')

In [13]:
%%time
logger.info("Start optimization.")
study.optimize(objective, n_trials=3)

  client.wait_for_workers(n_workers)
[17:01:30] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:02:22] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:03:12] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:04:02] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:04:52] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:05:43] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:06:30] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:07:17] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:08:03] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)


CPU times: user 30 s, sys: 13 s, total: 43.1 s
Wall time: 13min 20s


In [14]:
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.head()

Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,params_tree_method,state
0,0,0.866717,3.566109,0.33699,logloss,0.110663,0.080941,17,289,0.400894,gpu_hist,COMPLETE
1,1,0.885717,0.003675,0.470387,logloss,0.004926,0.087824,13,294,0.759316,gpu_hist,COMPLETE
2,2,0.8871,0.732165,0.957175,logloss,0.001125,0.04588,9,286,0.947977,gpu_hist,COMPLETE


In [15]:
%%time
study.optimize(objective, n_trials=5)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head()

  client.wait_for_workers(n_workers)
[17:14:51] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:17:51] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:20:48] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:23:44] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:26:42] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:29:38] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:31:06] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[17:32:33] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[17:33:59] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)


CPU times: user 1min 53s, sys: 1min 1s, total: 2min 54s
Wall time: 1h 9min 39s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,params_tree_method,state
0,0,0.866717,3.566109,0.33699,logloss,0.110663,0.080941,17,289,0.400894,gpu_hist,COMPLETE
1,1,0.885717,0.003675,0.470387,logloss,0.004926,0.087824,13,294,0.759316,gpu_hist,COMPLETE
2,2,0.8871,0.732165,0.957175,logloss,0.001125,0.04588,9,286,0.947977,gpu_hist,COMPLETE
3,3,0.88085,0.097329,0.357466,logloss,1.2063,0.004286,17,38,0.67119,gpu_hist,COMPLETE
4,4,0.8662,0.008397,0.573746,logloss,0.202615,0.0062,9,205,0.932661,gpu_hist,COMPLETE


In [16]:
%%time
study.optimize(objective, n_trials=10)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head()

  client.wait_for_workers(n_workers)
[18:24:31] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[18:25:48] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[18:27:03] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[18:28:18] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[18:29:33] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[18:30:49] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[18:32:09] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[18:33:29] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[18:34:48] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)


CPU times: user 4min 5s, sys: 2min 11s, total: 6min 16s
Wall time: 2h 29min 52s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,params_tree_method,state
0,0,0.866717,3.566109,0.33699,logloss,0.110663,0.080941,17,289,0.400894,gpu_hist,COMPLETE
1,1,0.885717,0.003675,0.470387,logloss,0.004926,0.087824,13,294,0.759316,gpu_hist,COMPLETE
2,2,0.8871,0.732165,0.957175,logloss,0.001125,0.04588,9,286,0.947977,gpu_hist,COMPLETE
3,3,0.88085,0.097329,0.357466,logloss,1.2063,0.004286,17,38,0.67119,gpu_hist,COMPLETE
4,4,0.8662,0.008397,0.573746,logloss,0.202615,0.0062,9,205,0.932661,gpu_hist,COMPLETE


In [17]:
%%time
study.optimize(objective, n_trials=40)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head()

  client.wait_for_workers(n_workers)
[22:39:28] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[22:40:45] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[22:42:01] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[22:43:17] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[22:44:35] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[22:45:52] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[22:47:32] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[22:49:11] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[22:50:50] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)


CPU times: user 11min 58s, sys: 5min 48s, total: 17min 46s
Wall time: 6h 40min 6s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,params_tree_method,state
0,0,0.866717,3.566109,0.33699,logloss,0.110663,0.080941,17,289,0.400894,gpu_hist,COMPLETE
1,1,0.885717,0.003675,0.470387,logloss,0.004926,0.087824,13,294,0.759316,gpu_hist,COMPLETE
2,2,0.8871,0.732165,0.957175,logloss,0.001125,0.04588,9,286,0.947977,gpu_hist,COMPLETE
3,3,0.88085,0.097329,0.357466,logloss,1.2063,0.004286,17,38,0.67119,gpu_hist,COMPLETE
4,4,0.8662,0.008397,0.573746,logloss,0.202615,0.0062,9,205,0.932661,gpu_hist,COMPLETE


In [18]:
df.tail()

Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,params_tree_method,state
53,53,0.909,0.001382,0.480116,logloss,5.213399,0.046748,7,5,0.882155,gpu_hist,COMPLETE
54,54,0.90705,0.001591,0.455146,logloss,1.024063,0.063497,7,33,0.846706,gpu_hist,COMPLETE
55,55,0.899533,0.001021,0.503568,logloss,5.679056,0.021861,7,54,0.96018,gpu_hist,COMPLETE
56,56,0.908,1.87259,0.399854,logloss,3.435887,0.057104,7,20,0.9094,gpu_hist,COMPLETE
57,57,0.910217,0.003146,0.585522,logloss,0.375291,0.082491,9,1,0.853379,gpu_hist,COMPLETE


In [19]:
%%time
study.optimize(objective, n_trials=20)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_0.csv', index=False)
df.head()

  client.wait_for_workers(n_workers)
[09:23:37] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[09:26:21] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[09:29:06] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[09:31:49] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[09:34:33] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[09:37:18] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[09:43:20] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)
[09:49:20] task [xgboost.dask-1]:tcp://127.0.0.1:41589 got new rank 0
  client.wait_for_workers(n_workers)
[09:55:19] task [xgboost.dask-0]:tcp://127.0.0.1:40161 got new rank 0
  client.wait_for_workers(n_workers)


CPU times: user 7min 14s, sys: 3min 31s, total: 10min 45s
Wall time: 3h 57min 35s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,params_tree_method,state
0,0,0.866717,3.566109,0.33699,logloss,0.110663,0.080941,17,289,0.400894,gpu_hist,COMPLETE
1,1,0.885717,0.003675,0.470387,logloss,0.004926,0.087824,13,294,0.759316,gpu_hist,COMPLETE
2,2,0.8871,0.732165,0.957175,logloss,0.001125,0.04588,9,286,0.947977,gpu_hist,COMPLETE
3,3,0.88085,0.097329,0.357466,logloss,1.2063,0.004286,17,38,0.67119,gpu_hist,COMPLETE
4,4,0.8662,0.008397,0.573746,logloss,0.202615,0.0062,9,205,0.932661,gpu_hist,COMPLETE


2022-08-29 12:55:34,539 - distributed.nanny - ERROR - Worker process died unexpectedly
2022-08-29 12:55:34,539 - distributed.nanny - ERROR - Worker process died unexpectedly
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
