In [2]:
from sklearn.datasets import load_iris

X,y = load_iris(return_X_y=True)

In [4]:
X.shape

(150, 4)

In [5]:
X[0]

array([5.1, 3.5, 1.4, 0.2])

In [6]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [23]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import joblib
import numpy as np


In [None]:
# 加载数据集
data = load_iris()
X = data.data
y = data.target

# 数据拆分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [17]:
# 权重参数
theta = np.random.randn(4,3)
bias = np.zeros((1, 3)) 
# 超参数
lr = 0.01
epochs = 3000  # 训练次数


In [21]:
# 2. 模型计算函数
def forward(x, theta, bias):
    # 线性运算
    z = np.dot(x, theta) + bias 
    # softmax 激活函数
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # 稳定化
    y_hat = exp_z / np.sum(exp_z, axis=1, keepdims=True)  # shape (105, 3)
    return y_hat

# 3. 计算损失函数
def loss(y, y_hat):
    e = 1e-7
    return -np.sum(np.log(y_hat[np.arange(y.shape[0]), y]) + e) / y.shape[0]  # 只计算正确类别的损失

# 4. 计算梯度
def calc_gradient(x,y,y_hat):
    # 计算梯度
    m = x.shape[0]
    # theta梯度计算
    delta_theta = np.dot(x.T, (y_hat - np.eye(3)[y])) / m
    # bias梯度计算
    delta_bias = np.mean(y_hat - np.eye(3)[y], axis=0, keepdims=True)
    # 返回梯度
    return delta_theta, delta_bias

# 5. 模型训练
for i in range(epochs):
    # 前向计算
    y_hat = forward(X_train, theta, bias)
    # 计算损失
    loss_val = loss(y_train, y_hat)
    # 计算梯度
    delta_theta, delta_bias = calc_gradient(X_train, y_train, y_hat)
    # 更新参数
    theta = theta - lr * delta_theta
    bias = bias - lr * delta_bias

    if i % 200 == 0:
        # 计算准确率
        acc = np.mean(np.argmax(y_hat, axis=1) == y_train)  # 计算准确率
        print(f"epoch: {i}, loss: {loss_val}, acc: {acc}")

epoch: 0, loss: 5.12462716338135, acc: 0.3523809523809524
epoch: 200, loss: 0.5564223840151731, acc: 0.7142857142857143
epoch: 400, loss: 0.47297125277971297, acc: 0.8761904761904762
epoch: 600, loss: 0.42006037439186517, acc: 0.9047619047619048
epoch: 800, loss: 0.38073900995233934, acc: 0.9523809523809523
epoch: 1000, loss: 0.349455041035334, acc: 0.9809523809523809
epoch: 1200, loss: 0.3236700558995372, acc: 0.9904761904761905
epoch: 1400, loss: 0.30195308927788983, acc: 0.9904761904761905
epoch: 1600, loss: 0.2833831169693665, acc: 0.9904761904761905
epoch: 1800, loss: 0.2673167013482051, acc: 0.9904761904761905
epoch: 2000, loss: 0.25328083343023117, acc: 0.9904761904761905
epoch: 2200, loss: 0.24091610825856088, acc: 0.9904761904761905
epoch: 2400, loss: 0.2299431137710929, acc: 0.9904761904761905
epoch: 2600, loss: 0.22014088658688694, acc: 0.9904761904761905
epoch: 2800, loss: 0.21133229500813505, acc: 0.9904761904761905


In [24]:
joblib.dump(theta, 'theta.pkl')  # 保存 theta
joblib.dump(bias, 'bias.pkl')    # 保存 bias

print("模型参数已保存到文件")

模型参数已保存到文件
