In [None]:
import numpy as np
import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler

# Generate synthetic binary classification dataset (replace with actual data)
np.random.seed(42)
X = np.random.rand(1000, 5) * 10  # 5 features
y = (2 * X[:, 0] - 3 * X[:, 1] + np.random.normal(0, 1, 1000)) > 0  # Monotonic relationship
y = y.astype(int)  # Convert to binary labels

# Define monotonicity constraints (assume known feature relationships)
monotonic_constraints = (1, -1, 0, 0, 1)  # +1 for increasing, -1 for decreasing, 0 for no constraint

# Split dataset
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

# Define the objective function for Optuna
def objective(trial):
    params = {
        "objective": "binary:logistic",  # Binary classification
        "tree_method": "hist",  # Optimized for performance
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "min_child_weight": trial.suggest_float("min_child_weight", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "lambda": trial.suggest_float("lambda", 1e-3, 10.0),  # L2 regularization
        "alpha": trial.suggest_float("alpha", 1e-3, 10.0),  # L1 regularization
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.5, 3.0),  # Handle class imbalance
        "n_estimators": 500,  # Train full 500 trees without early stopping
        "monotone_constraints": monotonic_constraints
    }

    # Train model without early stopping
    model = xgb.XGBClassifier(**params)
    model.fit(X_train, y_train)

    # Evaluate performance
    y_pred = model.predict_proba(X_valid)[:, 1]  # Get probability for positive class
    return roc_auc_score(y_valid, y_pred)  # AUC score for binary classification

# Run Optuna optimization
study = optuna.create_study(direction="maximize")  # Maximize AUC
study.optimize(objective, n_trials=5)

# Best hyperparameters
best_params = study.best_params
best_params["monotone_constraints"] = monotonic_constraints  # Ensure constraints are included
print("Best Parameters:", best_params)

# Train final model with best hyperparameters
final_model = xgb.XGBClassifier(**best_params)
final_model.fit(X_train, y_train)

# Evaluate final model
y_pred = final_model.predict_proba(X_valid)[:, 1]  # Get probability for positive class
auc_score = roc_auc_score(y_valid, y_pred)
print(f"Final Model AUC: {auc_score:.4f}")
