In [8]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import optuna
import gc
xgb.__version__

'1.5.0'

In [2]:
import pandas as pd


X_train = pd.read_csv('https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs21/main/hw02-starter/dataset/X_train.csv', header=None).values
y_train = pd.read_csv('https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs21/main/hw02-starter/dataset/y_train.csv', header=None).values.ravel().astype(int)

X_test = pd.read_csv('https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs21/main/hw02-starter/dataset/X_test.csv', header=None).values
y_test = pd.read_csv('https://raw.githubusercontent.com/rasbt/stat451-machine-learning-fs21/main/hw02-starter/dataset/y_test.csv', header=None).values.ravel().astype(int)

print('X_train.shape:', X_train.shape)
print('y_train.shape:', y_train.shape)
print('X_test.shape:', X_test.shape)
print('y_test.shape:', y_test.shape)

X_train.shape: (9119, 16)
y_train.shape: (9119,)
X_test.shape: (4492, 16)
y_test.shape: (4492,)


In [3]:
X_train_sub, X_valid, y_train_sub, y_valid = \
    train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)

print('Train/Valid/Test sizes:', y_train_sub.shape[0], y_valid.shape[0], y_test.shape[0])

Train/Valid/Test sizes: 7295 1824 4492


In [4]:
y_train_sub

array([3, 0, 3, ..., 3, 4, 2])

In [12]:
y_test

array([2, 0, 5, ..., 4, 6, 3])

In [5]:
np.unique(y_train_sub)

array([0, 1, 2, 3, 4, 5, 6])

In [6]:
dtrain = xgb.DMatrix(X_train_sub, label=y_train_sub)
dvalid = xgb.DMatrix(X_valid, label=y_valid)

In [7]:
num_round = 1000

In [16]:
def objective(trial):
        
    params = {
        'objective': trial.suggest_categorical('objective',['multi:softmax']), 
        'tree_method': trial.suggest_categorical('tree_method',['gpu_hist']),  # 'gpu_hist','hist'
        'lambda': trial.suggest_loguniform('lambda',1e-3,10.0),
        'alpha': trial.suggest_loguniform('alpha',1e-3,10.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,0.1),
        #'n_estimators': trial.suggest_categorical('n_estimators', [1000]),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15,17,20]),
        #'random_state': trial.suggest_categorical('random_state', [24,48,2020]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['mlogloss']),
        'num_class': trial.suggest_categorical('num_class', [7])

    }

    model = xgb.train(params, dtrain, num_round)
    predictions = model.predict(dvalid)

    acc = accuracy_score(y_valid, predictions)
    
    return acc

In [17]:
study = optuna.create_study(direction='maximize')

[32m[I 2021-11-16 15:17:57,865][0m A new study created in memory with name: no-name-863f7508-4374-469d-9b93-9adec112af9d[0m


In [18]:
%%time
study.optimize(objective, n_trials=2)

[32m[I 2021-11-16 15:18:02,245][0m Trial 0 finished with value: 0.9139254385964912 and parameters: {'objective': 'multi:softmax', 'tree_method': 'gpu_hist', 'lambda': 0.08959278301488241, 'alpha': 0.034589419521844654, 'colsample_bytree': 0.9658241624365731, 'subsample': 0.5808919143361457, 'learning_rate': 0.009581421719647683, 'max_depth': 9, 'min_child_weight': 67, 'eval_metric': 'mlogloss', 'num_class': 7}. Best is trial 0 with value: 0.9139254385964912.[0m
[32m[I 2021-11-16 15:18:09,675][0m Trial 1 finished with value: 0.8338815789473685 and parameters: {'objective': 'multi:softmax', 'tree_method': 'gpu_hist', 'lambda': 0.0261098832308896, 'alpha': 0.15112562041527816, 'colsample_bytree': 0.7836143885528984, 'subsample': 0.5217363054336831, 'learning_rate': 0.008015129316045467, 'max_depth': 17, 'min_child_weight': 259, 'eval_metric': 'mlogloss', 'num_class': 7}. Best is trial 0 with value: 0.9139254385964912.[0m


CPU times: user 11.4 s, sys: 80.6 ms, total: 11.4 s
Wall time: 11 s


In [19]:
%%time
study.optimize(objective, n_trials=100)

[32m[I 2021-11-16 15:18:32,184][0m Trial 2 finished with value: 0.8766447368421053 and parameters: {'objective': 'multi:softmax', 'tree_method': 'gpu_hist', 'lambda': 2.1040184452366053, 'alpha': 0.005116629399450099, 'colsample_bytree': 0.7660548418045704, 'subsample': 0.9654036457617079, 'learning_rate': 0.04924854372979283, 'max_depth': 3, 'min_child_weight': 172, 'eval_metric': 'mlogloss', 'num_class': 7}. Best is trial 0 with value: 0.9139254385964912.[0m
[32m[I 2021-11-16 15:19:22,122][0m Trial 3 finished with value: 0.9237938596491229 and parameters: {'objective': 'multi:softmax', 'tree_method': 'gpu_hist', 'lambda': 0.02159971932830458, 'alpha': 0.3819006809371373, 'colsample_bytree': 0.6552831234656763, 'subsample': 0.9056665331256465, 'learning_rate': 0.09984134446658842, 'max_depth': 20, 'min_child_weight': 43, 'eval_metric': 'mlogloss', 'num_class': 7}. Best is trial 3 with value: 0.9237938596491229.[0m
[32m[I 2021-11-16 15:19:24,793][0m Trial 4 finished with value:

CPU times: user 27min 33s, sys: 6.01 s, total: 27min 39s
Wall time: 27min 2s


In [20]:
study.best_trial.params

{'objective': 'multi:softmax',
 'tree_method': 'gpu_hist',
 'lambda': 0.019613630447437882,
 'alpha': 0.2766220032240862,
 'colsample_bytree': 0.7194811956259057,
 'subsample': 0.9724036039411196,
 'learning_rate': 0.0019627396144725937,
 'max_depth': 9,
 'min_child_weight': 1,
 'eval_metric': 'mlogloss',
 'num_class': 7}

In [21]:
%%time
study.optimize(objective, n_trials=100)

[32m[I 2021-11-16 16:17:13,598][0m Trial 102 finished with value: 0.930921052631579 and parameters: {'objective': 'multi:softmax', 'tree_method': 'gpu_hist', 'lambda': 0.04143089538227547, 'alpha': 0.13050157947426363, 'colsample_bytree': 0.6984381233460304, 'subsample': 0.7628713966213765, 'learning_rate': 0.0017725908739943043, 'max_depth': 9, 'min_child_weight': 2, 'eval_metric': 'mlogloss', 'num_class': 7}. Best is trial 61 with value: 0.9331140350877193.[0m
[32m[I 2021-11-16 16:17:23,914][0m Trial 103 finished with value: 0.9205043859649122 and parameters: {'objective': 'multi:softmax', 'tree_method': 'gpu_hist', 'lambda': 0.043293296474767574, 'alpha': 0.13380389065399795, 'colsample_bytree': 0.6953810442540308, 'subsample': 0.7620062716580102, 'learning_rate': 0.0018368078442897683, 'max_depth': 9, 'min_child_weight': 14, 'eval_metric': 'mlogloss', 'num_class': 7}. Best is trial 61 with value: 0.9331140350877193.[0m
[32m[I 2021-11-16 16:17:30,859][0m Trial 104 finished w

CPU times: user 27min 45s, sys: 5.81 s, total: 27min 51s
Wall time: 27min 5s
