#### 调整学习率，观察模型训练结果

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
# 1. 生成训练数据
X,y = load_iris(return_X_y=True)

X = X[:100]
y = y[:100]

# 数据拆分
# 局部样本训练模型（过拟合模型）测试预测不好
# 新样本数据模型表现不好（泛化能力差）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [3]:

# 超参数
lrs = [0.1, 0.01, 0.001]  # 学习率
epochs = 500  # 训练次数

In [4]:
# 2. 模型计算函数
def forward(x, theta, bias):
    # 线性运算
    z = np.dot(theta, x.T) + bias 
    # sigmoid
    y_hat = 1 / (1 + np.exp(-z))  
    return y_hat

# 3. 计算损失函数
def loss(y, y_hat):
    e = 1e-8
    return - y * np.log(y_hat + e) - (1 - y) * np.log(1 - y_hat + e)

# 4. 计算梯度
def calc_gradient(x,y,y_hat):
    # 计算梯度
    m = x.shape[-1]
    # theta梯度计算
    delta_theta = np.dot((y_hat - y), x) / m
    # bias梯度计算
    delta_bias = np.mean(y_hat - y)
    # 返回梯度
    return delta_theta, delta_bias

# 5. 模型训练
for lr in lrs:
    # 权重参数
    theta = np.random.randn(1,4)  # shape (1, 4)
    bias = 0

    for i in range(epochs):
        # 前向计算
        y_hat = forward(X_train, theta, bias)
        # 计算损失
        loss_val = loss(y_train, y_hat)
        # 计算梯度
        delta_theta, delta_bias = calc_gradient(X_train, y_train, y_hat)
        # 更新参数
        theta = theta - lr * delta_theta
        bias = bias - lr * delta_bias

        if i % 10 == 0:
            # 计算准确率
            acc = np.mean(np.round(y_hat) == y_train)  # [False,True,...,False] -> [0,1,...,0]
            print(f"lr:{lr} epoch: {i}, loss: {np.mean(loss_val)}, acc: {acc}")
    print('*'*50)

lr:0.1 epoch: 0, loss: 6.737876089832445, acc: 0.5
lr:0.1 epoch: 10, loss: 0.0035842303535322403, acc: 1.0
lr:0.1 epoch: 20, loss: 0.003320793191357164, acc: 1.0
lr:0.1 epoch: 30, loss: 0.003099982151563609, acc: 1.0
lr:0.1 epoch: 40, loss: 0.0029081560928105018, acc: 1.0
lr:0.1 epoch: 50, loss: 0.002739870040358993, acc: 1.0
lr:0.1 epoch: 60, loss: 0.002590975880900283, acc: 1.0
lr:0.1 epoch: 70, loss: 0.002458252794282741, acc: 1.0
lr:0.1 epoch: 80, loss: 0.0023391608459611473, acc: 1.0
lr:0.1 epoch: 90, loss: 0.0022316693854515625, acc: 1.0
lr:0.1 epoch: 100, loss: 0.002134134980100507, acc: 1.0
lr:0.1 epoch: 110, loss: 0.0020452129397813858, acc: 1.0
lr:0.1 epoch: 120, loss: 0.001963792102702569, acc: 1.0
lr:0.1 epoch: 130, loss: 0.001888946034557314, acc: 1.0
lr:0.1 epoch: 140, loss: 0.0018198960070110117, acc: 1.0
lr:0.1 epoch: 150, loss: 0.0017559825606817043, acc: 1.0
lr:0.1 epoch: 160, loss: 0.0016966434124230738, acc: 1.0
lr:0.1 epoch: 170, loss: 0.0016413961116984739, acc: 1

In [5]:
# 模型推理
idx = np.random.randint(len(X_test)) # 随机选择一个测试样本索引
x = X_test[idx]
y = y_test[idx]

predict = np.round(forward(x, theta, bias))
print(f"y: {y}, predict: {predict}")

y: 0, predict: [0.]
