In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [4]:
# 加载数据
iris = load_iris()
X, y = iris.data, iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
# 定义Softmax回归
def softmax_regression(X, w):
    z = X @ w
    exp_z = np.exp(z - z.max(axis=1, keepdims=True))  # 防止数值溢出
    y = exp_z / exp_z.sum(axis=1, keepdims=True)
    return y

# 定义交叉熵损失的导函数
def dCrossEntropy(X_train, y_train, y_pred):
    n = len(X_train)
    return X_train.T @ (y_pred - y_train) / n

# 定义one-hot
def onehot(y):
    return np.eye(3)[y]

In [6]:
# 初始化参数
w = np.array([
    [0, 0, 0], 
    [0, 0, 0], 
    [0, 0, 0],
    [0, 0, 0], 
])

# 梯度下降更新
alpha = 0.01
for i in range(1000):
    y_pred = softmax_regression(X_train, w)
    df = dCrossEntropy(X_train, onehot(y_train), y_pred)
    w = w - alpha * df

print(w)

# 预测
y_pred = softmax_regression(X_test, w)
y_pred = y_pred.argmax(axis=1)

print('准确率:', round(accuracy_score(y_test, y_pred), 2))

# 测试示例数据
X_exam = np.array([
    [4.8, 3.0, 1.1, 0.1]
])

y_exam = softmax_regression(X_exam, w)
y_exam = y_exam.argmax(axis=1)[0]
print('示例数据结果:', iris.target_names[y_exam])

[[ 0.33271293  0.2148396  -0.54755253]
 [ 0.83325269 -0.27878767 -0.55446503]
 [-1.18629723  0.16136112  1.02493611]
 [-0.53740959 -0.18095203  0.71836162]]
准确率: 1.0
示例数据结果: setosa
