# Training Models

In [49]:
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
from xgboost import XGBClassifier
# from catboost import CatBoostClassifier

# Import Data

In [50]:
import pandas as pd 
import numpy as np

In [51]:
xtrain = pd.read_csv('scaled_data/xtrain.csv', index_col = 0)
xtest = pd.read_csv('scaled_data/xtest.csv', index_col = 0)
ytrain = pd.read_csv('scaled_data/ytrain.csv', index_col = 0)
ytest = pd.read_csv('scaled_data/ytest.csv', index_col = 0)

In [52]:
train = pd.read_csv('scaled_data/train_scaled.csv', index_col = 0)
test = pd.read_csv('scaled_data/test_scaled.csv', index_col = 0)

In [53]:
# for the labels and the id
train_raw = pd.read_csv('data/train_sample.csv', index_col = 0)
test_raw = pd.read_csv('data/test_sample.csv')

# Hyperparameter Tuning

In [54]:
import optuna
from optuna_integration.xgboost import XGBoostPruningCallback
from sklearn.model_selection import StratifiedKFold

ModuleNotFoundError: No module named 'optuna_integration'

In [None]:
# define the objective function
def objective(trial: optuna.Trial, 
              xtrain: pd.DataFrame, 
              ytrain: pd.DataFrame, 
              xtest: pd.DataFrame, 
              ytest: pd.DataFrame) -> float:
    # define the hyperparameters to tune
    hyperparams = {
        'objective' : 'binary:logistic',
        'eval_metric' : 'auc',
        'tree_method' : 'gpu_hist',
        'seed' : 123,
        'n_estimators' : trial.suggest_int('n_estimators', 500, 1500),
        'learning_rate' : trial.suggest_float('learning_rate', 1e-8, 1, log=True),
        'max_depth' : trial.suggest_int('max_depth', 5, 15),
        'gamma' : trial.suggest_float('gamma', 0, 20),
        'min_weight_child' : trial.suggest_int('min_weight_child', 2, 20),
        'reg_alpha' : trial.suggest_float('reg_alpha', 1e-2, 0.1),
        'reg_lambda' : trial.suggest_float('reg_lambda', 1e-2, 0.1),
        'subsample' : trial.suggest_float('subsample', 0.5, 0.9),
        'colsample_bytree' : trial.suggest_float('colsample_bytree', 0.5, 0.9),
        'colsample_bylevel' : trial.suggest_float('colsample_bylevel', 0.5, 0.9),
        'colsample_bynode' : trial.suggest_float('colsample_bynode', 0.5, 0.9),
        'grow_policy' : trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])
    }

    # pruning callback
    pruning_callback = XGBoostPruningCallback(trial, 'validation_0-auc')

    # train model
    xgb_clf = XGBClassifier(
        **hyperparams,
        #callbacks = [pruning_callback],
        early_stopping_rounds = 50,
        use_label_encoder = False
    )

    xgb_clf.fit(xtrain, ytrain, verbose = False)

    y_pred_proba = xgb_clf.predict_proba(xtest)

    roc_auc = roc_auc_score(ytest, y_pred_proba[:, 1])

    return float(roc_auc)

In [None]:
study = optuna.create_study(direction = 'maximize')
study.optimize(
    lambda trial: objective(trial, 
                            xtrain = xtrain, 
                            ytrain = ytrain, 
                            xtest = xtest, 
                            ytest = ytest), 
    n_trials=50)

[I 2024-07-28 19:10:54,274] A new study created in memory with name: no-name-91a5e7d1-b529-4314-9b35-6bb80e37ba91
[W 2024-07-28 19:10:54,381] Trial 0 failed with parameters: {'n_estimators': 957, 'learning_rate': 0.00019237353364109995, 'max_depth': 5, 'gamma': 6.644639718752043, 'min_weight_child': 2, 'reg_alpha': 0.08550108567514732, 'reg_lambda': 0.03819655209609482, 'subsample': 0.6392801292061446, 'colsample_bytree': 0.6934038286501232, 'colsample_bylevel': 0.5066454617423846, 'colsample_bynode': 0.5606796547958798, 'grow_policy': 'lossguide'} because of the following error: XGBoostError('[19:10:54] /Users/runner/work/xgboost/xgboost/src/gbm/../common/common.h:174: XGBoost version not compiled with GPU support.\nStack trace:\n  [bt] (0) 1   libxgboost.dylib                    0x00000001266fce28 dmlc::LogMessageFatal::~LogMessageFatal() + 124\n  [bt] (1) 2   libxgboost.dylib                    0x00000001268c4258 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::_

XGBoostError: [19:10:54] /Users/runner/work/xgboost/xgboost/src/gbm/../common/common.h:174: XGBoost version not compiled with GPU support.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x00000001266fce28 dmlc::LogMessageFatal::~LogMessageFatal() + 124
  [bt] (1) 2   libxgboost.dylib                    0x00000001268c4258 xgboost::gbm::GBTree::Configure(std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>>> const&) + 1256
  [bt] (2) 3   libxgboost.dylib                    0x00000001268e3248 xgboost::LearnerConfiguration::Configure() + 924
  [bt] (3) 4   libxgboost.dylib                    0x000000012671db2c XGBoosterBoostedRounds + 100
  [bt] (4) 5   libffi.dylib                        0x0000000191327050 ffi_call_SYSV + 80
  [bt] (5) 6   libffi.dylib                        0x000000019132fadc ffi_call_int + 1208
  [bt] (6) 7   _ctypes.cpython-311-darwin.so       0x00000001035c816c _ctypes_callproc + 1372
  [bt] (7) 8   _ctypes.cpython-311-darwin.so       0x00000001035c2164 PyCFuncPtr_call + 204
  [bt] (8) 9   Python                              0x0000000101e89738 _PyObject_MakeTpCall + 128



In [None]:
print('Best hyperparameters: ', study.best_params)

# Choosing Models