In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import roc_auc_score, make_scorer
from lightgbm import LGBMClassifier

# 固定随机种子
SEED = 42

# Step 1: 手动拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, stratify=y, random_state=SEED
)

# Step 2: 设置 LightGBM 基础模型
lgb_base = LGBMClassifier(
    class_weight='balanced',
    random_state=SEED,
    n_jobs=-1  # 多线程加速
)

# Step 3: 设置超参数网格（你可以进一步微调）
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Step 4: 设置交叉验证器和 AUC 评分器
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
scorer = make_scorer(roc_auc_score, needs_proba=True)

# Step 5: 网格搜索
grid_search = GridSearchCV(
    lgb_base,
    param_grid,
    scoring=scorer,
    cv=cv,
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# Step 6: 最佳模型评估
best_model = grid_search.best_estimator_
y_prob_test = best_model.predict_proba(X_test)[:, 1]
auc_test = roc_auc_score(y_test, y_prob_test)

print("✅ Best params:", grid_search.best_params_)
print(f"✅ Test AUC with best LightGBM model: {auc_test:.4f}")
