### 第八章 提升方法

In [66]:
import numpy as np
from typing import Tuple


def predict(X: np.ndarray, axis: list, threshold: list, weight: list, direction: list) -> np.ndarray:
    """
    adaboost预测
    param: X: np.ndarray - 特征矩阵
    return: output 预测标签
    """
    n = X.shape[0]
    output = np.zeros(n)
    for a, t, w, d in zip(axis, threshold, weight, direction):
        output += w * np.where(X[:,a] > t, d, -1 * d)
    return np.where(output>0, 1, -1)

def adaboost(X: np.ndarray, Y: np.ndarray) -> Tuple[list, list, list, list]:
    """
    adaboost学习算法
    param: X: np.ndarray - 特征矩阵
    param: Y: np.ndarray - 标签数组，形状为 (样本数,)，标签值需为 +1 或 -1
    return: axis 二叉树判别维度
    return: threshold 判别阈值
    return: weight 权重
    return: direction 弱分类器方向
    """
    n, m = X.shape

    axis = []
    threshold = []
    weight = []
    direction = []

    wgt = np.ones(n) / n
    for i in range(50):
        loss = np.inf
        for ax in range(m):
            unique_val = np.unique(X[:, ax])
            for di in [1, -1]:
                for val in unique_val:
                    cur_loss = np.sum(wgt * (np.where(X[:, ax] > val, di, -1*di)!=Y))
                    if cur_loss < loss:
                        loss = cur_loss
                        eps = np.clip(cur_loss, 1e-10, 1 - 1e-10)
                        a, t, w, d = ax, val, 0.5*np.log(1/eps - 1), di
        axis.append(a)
        threshold.append(t)
        weight.append(w)
        direction.append(d)
        wgt = wgt * np.exp(-1 * w * Y * np.where(X[:, a] > t, d, -1*d)) 
        wgt = wgt / np.sum(wgt)

        total_pred = predict(X, axis, threshold, weight, direction)
        if np.sum(total_pred != Y) == 0:
            print(f"在第{i+1}步训练集完全分类正确，提前终止迭代")
            break
    return axis, threshold, weight, direction

X = np.array([
    [0, 1, 3],
    [0, 3, 1],
    [1, 2, 2],
    [1, 1, 3],
    [1, 2, 3],
    [0, 1, 2],
    [1, 1, 2],
    [1, 1, 1],
    [1, 3, 1],
    [0, 2, 1]
])
Y = np.array([-1, -1, -1, -1, -1, -1, 1, 1, -1, -1])

a,t,w,d = adaboost(X, Y)
np.mean(predict(X,a,t,w,d)==Y)

在第6步训练集完全分类正确，提前终止迭代


np.float64(1.0)

In [56]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

ada_model = AdaBoostClassifier(
    n_estimators=50,
    learning_rate=1.0,
    random_state=42  # 固定随机种子，结果可复现
)

# 3. 训练模型
y=Y
ada_model.fit(X, y)

# 4. 预测（先对训练集预测，验证效果）
y_pred = ada_model.predict(X)

# 5. 模型评估
accuracy = accuracy_score(y, y_pred)
class_report = classification_report(y, y_pred)

# 6. 输出结果
print("===== AdaBoost 预测结果 =====")
print("真实标签：", y)
print("预测标签：", y_pred)
print(f"\n模型准确率：{accuracy:.2f}")
print("\n分类报告：")
print(class_report)

# 7. 预测新样本（示例：身体=1，业务能力=1，发展潜力=2）
new_sample = np.array([[1, 1, 2]])
new_pred = ada_model.predict(new_sample)
new_pred_proba = ada_model.predict_proba(new_sample)  # 预测概率
print(f"\n新样本 {new_sample[0]} 的预测结果：{new_pred[0]}")
print(f"新样本预测概率（-1类，1类）：{new_pred_proba[0]}")

# 8. 查看基分类器权重（了解每个基分类器的重要性）
print(f"\n基分类器权重：{ada_model.estimator_weights_[:5]}")  # 只显示前5个

===== AdaBoost 预测结果 =====
真实标签： [-1 -1 -1 -1 -1 -1  1  1 -1 -1]
预测标签： [-1 -1 -1 -1 -1 -1  1  1 -1 -1]

模型准确率：1.00

分类报告：
              precision    recall  f1-score   support

          -1       1.00      1.00      1.00         8
           1       1.00      1.00      1.00         2

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10


新样本 [1 1 2] 的预测结果：1
新样本预测概率（-1类，1类）：[0.40276836 0.59723164]

基分类器权重：[1.38629436 1.46633707 0.93430924 1.29928298 1.41955468]
