In [102]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

X,y = load_iris(return_X_y=True)
# 取前一百条数据作为样本
X = X[:100]
y = y[:100]

#数据拆分
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(80, 4) (20, 4) (80,) (20,)


In [103]:
# 权重参数
theta = np.random.randn(1,4)
bias = 0
# 超参数
lr = 0.1 # 学习率
epochs = 1000 # 训练次数
print(f"theta: {theta}, bias: {bias}, lr: {lr}, epochs: {epochs}")

theta: [[ 2.02263828  0.953709   -0.48044643 -0.93901783]], bias: 0, lr: 0.1, epochs: 1000


In [104]:
def forward(X, theta, bias):
    # 线性运算
    z = np.dot(theta, X.T) + bias
    # sigmoid函数
    y_hat = 1 / (1 + np.exp(-z))
    return y_hat

# sigmoid损失函数
def loss(y, y_hat):
    e = 1e-8
    return - y * np.log(y_hat + e) - (1 - y) * np.log(1 - y_hat + e)

# 计算梯度
def cal_gradient(x, y, y_hat):
    m = x.shape[-1]
    delta_theta = np.dot((y_hat - y), x) / m
    delta_bais = np.mean(y_hat - y)
    return delta_theta,delta_bais


In [105]:
# 模型训练
for i in range(epochs):
    #前向计算
    y_hat = forward(X_train, theta, bias)
    # 损失计算
    loss_val = loss(y_train, y_hat)
    # 计算梯度
    delta_theta, delta_bias = cal_gradient(X_train, y_train, y_hat)
    # 更新参数
    theta = theta - lr * delta_theta
    bias = bias - lr * delta_bias

    # 间隔5步输出一次
    if i % 5 == 0:
        # 计算准确率
        acc = np.mean(np.round(y_hat) == y_train)
        print(f"epoch: {i}, loss: {np.mean(loss_val)}, acc: {acc}")

# 将参数保存到npy文件
iris_model_params = {'theta': [], 'bias': 0}
iris_model_params['theta'] = theta
iris_model_params['bias'] = bias
np.save("鸢尾花模型参数.npy", iris_model_params)

epoch: 0, loss: 6.860273370691426, acc: 0.45
epoch: 5, loss: 8.288928306619354, acc: 0.55
epoch: 10, loss: 2.451047923893394, acc: 0.4625
epoch: 15, loss: 0.001331210718621003, acc: 1.0
epoch: 20, loss: 0.0007763101404630805, acc: 1.0
epoch: 25, loss: 0.0005494367578342199, acc: 1.0
epoch: 30, loss: 0.00042563046887686906, acc: 1.0
epoch: 35, loss: 0.0003475447684446924, acc: 1.0
epoch: 40, loss: 0.0002937609093393693, acc: 1.0
epoch: 45, loss: 0.0002544444410784117, acc: 1.0
epoch: 50, loss: 0.00022444144703156476, acc: 1.0
epoch: 55, loss: 0.00020078839611039758, acc: 1.0
epoch: 60, loss: 0.0001816595799951801, acc: 1.0
epoch: 65, loss: 0.00016586854922353412, acc: 1.0
epoch: 70, loss: 0.00015261068638000502, acc: 1.0
epoch: 75, loss: 0.00014132095575392382, acc: 1.0
epoch: 80, loss: 0.00013159086246541536, acc: 1.0
epoch: 85, loss: 0.0001231177250118173, acc: 1.0
epoch: 90, loss: 0.00011567248063698778, acc: 1.0
epoch: 95, loss: 0.00010907857534487842, acc: 1.0
epoch: 100, loss: 0.0

In [108]:
# 模型推理
idx = np.random.randint(len(X_test))
x = X_test[idx]
y = y_test[idx]

predict = np.round(forward(x, theta, bias))
print(f"y: {y}, predict: {predict}")

y: 1, predict: [1.]


In [None]:
# 加载鸢尾花模型参数进行预测
iris_model_params = np.load("鸢尾花模型参数.npy", allow_pickle=True).item()
theta = iris_model_params['theta']
bias = iris_model_params['bias']
print(f"theta: {theta}, bias: {bias}")

# 模型预测
X,y = load_iris(return_X_y=True)
X_new = X[:100]
y_new = y[:100]

# 计算准确率
acc = np.mean(np.round(forward(X_new, theta, bias)) == y_new)
print(f"acc: {acc}")

theta: [[-2.47106909 -8.93222062 11.7197772   4.4029588 ]], bias: -0.09244774592391142
[[2.61595349e-12 3.73139664e-10 3.17453024e-11 1.03486801e-09
  1.37096312e-12 2.84039918e-12 3.41472188e-11 2.64150876e-11
  3.13601884e-09 3.17479282e-10 6.74247194e-13 1.39787985e-10
  3.07588796e-10 3.14489375e-11 5.11488496e-16 1.49229230e-15
  2.61484260e-14 4.06301185e-12 2.12878214e-12 8.99636998e-13
  1.02459044e-10 3.41355411e-12 3.39125631e-14 1.96823715e-09
  4.70348988e-09 3.03753409e-09 2.05717697e-10 6.59623785e-12
  4.99153664e-12 1.06814408e-09 2.03814033e-09 2.37148034e-11
  1.99778966e-14 1.87461229e-15 4.93098248e-10 4.68537666e-12
  3.01562887e-13 1.13012080e-12 3.97625103e-10 2.06316940e-11
  1.61132001e-12 2.50509661e-07 6.66240409e-11 2.03135760e-10
  1.51781628e-10 7.42004696e-10 1.86996162e-12 1.31213975e-10
  8.63249459e-13 1.99889513e-11 9.99999767e-01 9.99999646e-01
  9.99999995e-01 9.99999990e-01 9.99999996e-01 9.99999996e-01
  9.99999958e-01 9.99920542e-01 9.99999970e-0