In [121]:
# 调整学习率，样本数据拆分比率，观察训练结果
# 把模型训练参数保存到文件，在另一个代码中加载参数实现预测功能

In [122]:
from sklearn.model_selection import train_test_split
import numpy as np

# 使用sklearn数据集训练逻辑回归模型
from sklearn.datasets import load_iris

In [123]:
# 1. 生成数据集
_test_size = 0.3
X,y = load_iris(return_X_y=True)
X = X[:100] 
y = y[:100]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=_test_size)

In [124]:

# 权重参数
theta = np.random.randn(1, X_train.shape[1]) # shape (1, 4)
bias = 0
# 超参数
learning_rate = 0.01 # 学习率
epochs = 1000 # 迭代次数
print(theta)

[[1.27468205 1.44356589 1.10722481 0.38723828]]


In [125]:
# 2. 模型计算函数
def forward(x, theta, bias):
    # 线性运算
    z = np.dot(theta, x.T) + bias # shape (105,10)
    # sigmoid
    y_hat = 1 / (1 + np.exp(-z))  # shape (105,10)
    return y_hat

In [126]:
# 3. 计算损失函数
def loss(y, y_hat):
    e = 1e-8
    return - y * np.log(y_hat + e) - (1 - y) * np.log(1 - y_hat + e)

In [127]:
# 4. 计算梯度
def calc_gradient(x,y,y_hat):
    # 计算梯度
    m = x.shape[-1]
    # theta梯度计算
    delta_theta = np.dot((y_hat - y), x) / m
    # bias梯度计算
    delta_bias = np.mean(y_hat - y)
    # 返回梯度
    return delta_theta, delta_bias

In [128]:
# 5. 模型训练
for i in range(epochs):
    # 前向计算
    y_hat = forward(X_train, theta, bias)
    # 计算损失
    loss_val = loss(y_train, y_hat)
    # 计算梯度
    delta_theta, delta_bias = calc_gradient(X_train, y_train, y_hat)
    # 更新参数
    theta = theta - learning_rate * delta_theta
    bias = bias - learning_rate * delta_bias

    if i % 100 == 0:
        # 计算准确率
        acc = np.mean(np.round(y_hat) == y_train)  # [False,True,...,False] -> [0,1,...,0]
        print(f"epoch: {i}, loss: {np.mean(loss_val)}, acc: {acc}")

epoch: 0, loss: 5.8754448828590045, acc: 0.5571428571428572
epoch: 100, loss: 0.03821644815094121, acc: 1.0
epoch: 200, loss: 0.020177365411289134, acc: 1.0
epoch: 300, loss: 0.013888806091449518, acc: 1.0
epoch: 400, loss: 0.010660409794722004, acc: 1.0
epoch: 500, loss: 0.008685144727462938, acc: 1.0
epoch: 600, loss: 0.007347404408966634, acc: 1.0
epoch: 700, loss: 0.006379113556551473, acc: 1.0
epoch: 800, loss: 0.005644494529481843, acc: 1.0
epoch: 900, loss: 0.005067284346749469, acc: 1.0


In [129]:
# 6. 保存模型参数 theta 和 bias到一个文件中
dict_data = {'key1': 'value1', 'key2': 'value2'}

# 使用 np.savez 保存多个参数
np.savez('data.npz', theta=theta, bias=bias, dict_data=dict_data)