In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
import numpy as np


In [4]:
# 加载数据
iris = load_iris()
X, y = iris.data, iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# 第一层基学习器
base_models = [
    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('svm', SVC(kernel='linear', probability=True, random_state=42)),
    ('knn', KNeighborsClassifier(n_neighbors=3))
]

# 第二层元学习器
meta_model = LogisticRegression()
# 创建存储基模型预测结果的数组
meta_features = np.zeros((X_train.shape[0], len(base_models)))


In [6]:
# 使用5折交叉验证生成元特征
kf = KFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(kf.split(X_train)):
    # 划分训练集和验证集
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]
    
    # 训练基模型并生成预测
    for model_idx, (name, model) in enumerate(base_models):
        model.fit(X_train_fold, y_train_fold)
        preds = model.predict_proba(X_val_fold)  # 使用概率作为特征
        meta_features[val_idx, model_idx] = preds[:, 1]  # 取正类的概率（适用于二分类）

# 训练元模型
meta_model.fit(meta_features, y_train)

# 在测试集上评估
test_meta_features = np.zeros((X_test.shape[0], len(base_models)))
for model_idx, (name, model) in enumerate(base_models):
    model.fit(X_train, y_train)  # 在整个训练集上重新训练
    test_preds = model.predict_proba(X_test)
    test_meta_features[:, model_idx] = test_preds[:, 1]

# 最终预测
final_predictions = meta_model.predict(test_meta_features)
accuracy = np.mean(final_predictions == y_test)
print(f"Stacking模型准确率：{accuracy:.4f}")

Stacking模型准确率：0.7333
