In [2]:
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# 正确加载数据（避免使用生成器，直接获取数组形式）
data = load_iris()
X, y = data.data, data.target  # X为特征，y为标签

# 第一步：划分训练集和临时集（8:2）
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=42  # random_state保证结果可复现
)

# 第二步：将临时集划分为验证集和测试集（1:1）
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)

# 查看各数据集大小
print(f"训练集: {X_train.shape[0]} 样本")
print(f"验证集: {X_val.shape[0]} 样本")
print(f"测试集: {X_test.shape[0]} 样本")

# 初始化XGBoost模型（注意：鸢尾花是分类问题，应使用XGBClassifier）
xgb_clf = xgb.XGBClassifier()  # 而非XGBRegressor（回归器用于连续值预测）

# 用训练集训练，用验证集评估
xgb_clf.fit(X_train, y_train, eval_set=[(X_val, y_val)])

# 最终用测试集评估模型性能
test_score = xgb_clf.score(X_test, y_test)
print(f"测试集准确率: {test_score:.4f}")

训练集: 120 样本
验证集: 15 样本
测试集: 15 样本
[0]	validation_0-mlogloss:0.72461
[1]	validation_0-mlogloss:0.50611
[2]	validation_0-mlogloss:0.36622
[3]	validation_0-mlogloss:0.27081
[4]	validation_0-mlogloss:0.20370
[5]	validation_0-mlogloss:0.16001
[6]	validation_0-mlogloss:0.12878
[7]	validation_0-mlogloss:0.10098
[8]	validation_0-mlogloss:0.08663
[9]	validation_0-mlogloss:0.07307
[10]	validation_0-mlogloss:0.06455
[11]	validation_0-mlogloss:0.05507
[12]	validation_0-mlogloss:0.05027
[13]	validation_0-mlogloss:0.04404
[14]	validation_0-mlogloss:0.03967
[15]	validation_0-mlogloss:0.03493
[16]	validation_0-mlogloss:0.03158
[17]	validation_0-mlogloss:0.02955
[18]	validation_0-mlogloss:0.02687
[19]	validation_0-mlogloss:0.02475
[20]	validation_0-mlogloss:0.02377
[21]	validation_0-mlogloss:0.02282
[22]	validation_0-mlogloss:0.02245
[23]	validation_0-mlogloss:0.02202
[24]	validation_0-mlogloss:0.02155
[25]	validation_0-mlogloss:0.02062
[26]	validation_0-mlogloss:0.02025
[27]	validation_0-mlogloss:0.02