In [3]:
import numpy as np
from scipy.sparse import load_npz
import xgboost as xgb
from sklearn.metrics import roc_auc_score


In [4]:
X_train = load_npz("../data_process/processed_fe/X_train_proc_fe.npz")
X_valid = load_npz("../data_process/processed_fe/X_valid_proc_fe.npz")
X_test  = load_npz("../data_process/processed_fe/X_test_proc_fe.npz")

y_train = np.load("../data_process/processed_fe/y_train_fe.npy")
y_valid = np.load("../data_process/processed_fe/y_valid_fe.npy")

print("X_train:", X_train.shape)
print("X_valid:", X_valid.shape)
print("Type:", type(X_train))


X_train: (475195, 67)
X_valid: (118799, 67)
Type: <class 'scipy.sparse._csr.csr_matrix'>


In [5]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_valid, label=y_valid)


In [6]:
params = {
    "objective": "binary:logistic",
    "eval_metric": "auc",
    "eta": 0.03,
    "max_depth": 6,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "tree_method": "hist",  # Windows 推荐用 hist，加速&避免内存爆炸
}


In [7]:
evals = [(dtrain, "train"), (dvalid, "valid")]

model = xgb.train(
    params,
    dtrain,
    num_boost_round=2000,
    evals=evals,
    early_stopping_rounds=100,
    verbose_eval=100,
)


[0]	train-auc:0.91146	valid-auc:0.91043
[100]	train-auc:0.91569	valid-auc:0.91449
[200]	train-auc:0.91866	valid-auc:0.91662
[300]	train-auc:0.92095	valid-auc:0.91763
[400]	train-auc:0.92285	valid-auc:0.91833
[500]	train-auc:0.92463	valid-auc:0.91896
[600]	train-auc:0.92621	valid-auc:0.91937
[700]	train-auc:0.92771	valid-auc:0.91970
[800]	train-auc:0.92902	valid-auc:0.91992
[900]	train-auc:0.93036	valid-auc:0.92021
[1000]	train-auc:0.93156	valid-auc:0.92040
[1100]	train-auc:0.93271	valid-auc:0.92049
[1200]	train-auc:0.93382	valid-auc:0.92058
[1300]	train-auc:0.93487	valid-auc:0.92065
[1400]	train-auc:0.93586	valid-auc:0.92068
[1500]	train-auc:0.93682	valid-auc:0.92069
[1533]	train-auc:0.93714	valid-auc:0.92069


In [8]:
y_valid_pred = model.predict(dvalid, iteration_range=(0, model.best_iteration+1))
auc_xgb = roc_auc_score(y_valid, y_valid_pred)
print("XGBoost AUC:", auc_xgb)


XGBoost AUC: 0.9206955595733336
