In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
# 鸢尾花数据集是一个经典且非常简单的多类别分类数据集。默认（150,4）

# 生成预训练数据集
X,y = load_iris(return_X_y=True)
# 数据拆分
X_train, X_test, y_train, y_test = train_test_split(X[:100], y[:100], test_size=0.2)

# 模型训练
# 1. 模型参数初始化
theta = np.random.randn(1, 4)  # 随机初始化theta
bias = np.random.randn(1)  # 随机初始化bias
lr = 0.01  # 学习率
epochs = 4000  # 迭代次数
# 2. 模型计算函数
def forward(X,theta, bias):
    # 线性运算
    z = np.dot(theta, X.T) + bias
    # sigmoid逻辑回归函数
    y_hat = 1 / (1 + np.exp(-z))
    return y_hat
# 3. 计算损失函数
def loss(y,y_hat):
    e = 1e-8
    return - y * np.log(y_hat + e) - (1 - y) * np.log(1 - y_hat + e)
# 4. 计算梯度
def calc_gradient(x,y,y_hat):
    # 计算梯度
    m = x.shape[-1]
    # theta梯度计算
    delta_theta = np.dot((y_hat - y), x) / m
    # bias梯度计算
    delta_bias = np.mean(y_hat - y)
    # 返回梯度
    return delta_theta, delta_bias
# 5. 模型训练
for i in range(epochs):
    # 前向计算
    y_hat = forward(X_train, theta, bias)
    # 计算损失
    loss_val = loss(y_train, y_hat)
    # 计算梯度
    delta_theta, delta_bias = calc_gradient(X_train, y_train, y_hat)
    #更新参数
    theta -= delta_theta * lr
    bias -= delta_bias * lr
    if i % 100 == 0:
        # 计算准确率
        acc = np.mean(np.round(y_hat) == y_train)
        print(f"epoch: {i}, loss: {np.mean(loss_val)}, acc: {acc}")
# 保存模型参数到文件中
np.savez("model.npz", theta=theta, bias=bias)
# 模型预测
# 加载模型参数
model = np.load("model.npz")
theta = model["theta"]
bias = model["bias"]
print(theta, bias)
# 预测
y_hat = forward(X_test, theta, bias)
# 计算准确率
acc = np.mean(np.round(y_hat) == y_test)
print(f"acc: {acc}")

epoch: 0, loss: 4.3359306246061875, acc: 0.5
epoch: 100, loss: 0.03902307433148942, acc: 1.0
epoch: 200, loss: 0.01933540456073914, acc: 1.0
epoch: 300, loss: 0.013068404548774198, acc: 1.0
epoch: 400, loss: 0.009953301695189412, acc: 1.0
epoch: 500, loss: 0.00807837635762217, acc: 1.0
epoch: 600, loss: 0.006820899260634616, acc: 1.0
epoch: 700, loss: 0.005916427993266367, acc: 1.0
epoch: 800, loss: 0.005233180915267098, acc: 1.0
epoch: 900, loss: 0.0046979742343511936, acc: 1.0
epoch: 1000, loss: 0.004266843321108829, acc: 1.0
epoch: 1100, loss: 0.003911749635084408, acc: 1.0
epoch: 1200, loss: 0.003613949497310723, acc: 1.0
epoch: 1300, loss: 0.0033604265160725845, acc: 1.0
epoch: 1400, loss: 0.0031418527824271826, acc: 1.0
epoch: 1500, loss: 0.0029513646694443786, acc: 1.0
epoch: 1600, loss: 0.002783796780623412, acc: 1.0
epoch: 1700, loss: 0.00263518550777955, acc: 1.0
epoch: 1800, loss: 0.0025024374839593957, acc: 1.0
epoch: 1900, loss: 0.002383102296236905, acc: 1.0
epoch: 2000, 