<a href="https://colab.research.google.com/github/yl4970/Kaggle-Posion_Mushrooms/blob/main/optuna_xgb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# pip install optuna
# pip install category-encoders

In [19]:
import optuna
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import matthews_corrcoef

from train_prep import *

In [None]:
def objective(trial, X, y):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

    params = {
            'objective': 'binary:logistic',
            'n_estimators': 300,
            'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
            'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.1, 1.0),
            'eta': trial.suggest_loguniform('eta', 0.01, 0.1),
            'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
            'lambda': trial.suggest_loguniform('lambda', 1e-8, 0.1),
            'max_depth': trial.suggest_int("max_depth", 13, 23, step=2),
            'min_child_weight':  trial.suggest_int("min_child_weight", 2, 10),
            'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1e-5),
            'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1e-5),
            'subsample': trial.suggest_uniform('subsample', 0.1, 1.0),
            "verbosity": 0,
            "tree_method": "exact"
            }

    # Create the DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)

    # Train the model
    model = xgb.train(params, dtrain)

    # Predict probabilities for the test set
    dtest = xgb.DMatrix(X_test)
    y_pred = model.predict(dtest)

    # Convert results into binary
    y_pred = y_pred.round(decimals=0)

    # Calculate the MCC score
    mcc = matthews_corrcoef(y_test, y_pred)

    return mcc

study = optuna.create_study(direction='maximize')
func = lambda trial: objective(trial, X_enc, y)
study.optimize(func, n_trials=100)

# Print the best MCC score and the best parameters
print('Best MCC:', study.best_value)
print('Best Params:', study.best_params)


[I 2024-09-10 20:18:40,642] A new study created in memory with name: no-name-91b33d19-f6a2-47ce-aa2a-104c8e9da0af
[I 2024-09-10 20:20:45,898] Trial 0 finished with value: 0.9312909566704621 and parameters: {'alpha': 0.00019640101454243607, 'colsample_bytree': 0.9284099642184097, 'eta': 0.019113489820449855, 'gamma': 0.027045996757111543, 'lambda': 1.2938759724721471e-08, 'max_depth': 13, 'min_child_weight': 5, 'reg_alpha': 7.667019722486657e-07, 'reg_lambda': 1.933019956912997e-08, 'subsample': 0.940136228985871}. Best is trial 0 with value: 0.9312909566704621.
[I 2024-09-10 20:22:00,183] Trial 1 finished with value: 0.7656423862013898 and parameters: {'alpha': 0.0001367613482958315, 'colsample_bytree': 0.3959341079813814, 'eta': 0.027783773579381714, 'gamma': 0.2213489556606143, 'lambda': 2.4085582829815835e-06, 'max_depth': 19, 'min_child_weight': 9, 'reg_alpha': 1.1928105236251119e-08, 'reg_lambda': 4.144707331934054e-06, 'subsample': 0.3072042246615012}. Best is trial 0 with value: