In [1]:
import numpy as np
from typing import Any

In [2]:
class LDA:
    def __init__(self):
        """初始化 LDA 模型

        初始化权重 w 为 None，后续在 fit 中计算
        """
        self.w = None


    def calc_cov(self, X: np.ndarray) -> np.ndarray:
        """计算协方差矩阵 (辅助函数)

        Args:
            X (np.ndarray): 输入数据矩阵

        Returns:
            np.ndarray: 协方差矩阵
        """
        m = X.shape[0]
        array = X - np.mean(X, axis=0)
        return array.T @ array / m


    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        """训练 LDA 模型 (计算最佳投影方向 w)

        Args:
            X (np.ndarray): 训练数据特征矩阵
            y (np.ndarray): 训练数据标签 (二分类标签 0 或 1)

        Returns:
            None: 直接修改类内部的 self.w，无需返回
        """
        # 1. 按类别分组
        X0 = X[y==0]
        X1 = X[y==1]

        # 2. 计算类内散度矩阵 Sw
        X0_cov = self.calc_cov(X0)
        X1_cov = self.calc_cov(X1)
        Sw = X0_cov + X1_cov

        # 3. 计算均值差 (mu0 - mu1)
        mean_diff = np.mean(X0, axis=0) - np.mean(X1, axis=0)

        # 4. 对类间散度矩阵 Sw 进行奇异值分解，并求其逆；
        U, S, VT = np.linalg.svd(Sw)
        Sw_ = VT.T @ np.linalg.pinv(np.diag(S)) @ U.T
        # 5. 计算权重 w
        self.w = Sw_ @ mean_diff


    def predict(self, X: np.ndarray) -> np.ndarray:
        """使用训练好的 w 进行预测

        Args:
            X (np.ndarray): 测试数据特征矩阵

        Returns:
            np.ndarray: 预测结果 (通常是投影后的值或类别)
        """
        y_pred = []
        for sample in X:
            # 1. 数据投影
            h = sample @ self.w
            # 2. 根据阈值分类 (或直接返回投影值)
            y = 1 if h < 0 else 0
            y_pred.append(y)

        return y_pred


In [3]:
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

data = datasets.load_iris()
X = data.data
y = data.target

In [4]:
# 丢弃y=2的值
X = X[y != 2]
y = y[y != 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(80, 4) (20, 4) (80,) (20,)


In [None]:
lda = LDA()
lda.fit(X_train, y_train)
y_pred = lda.predict(X_test)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)