In [205]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
from joblib import dump , load

# 提取数据集
X , y = load_iris(return_X_y = True)
# 设置测试集跟训练集
train_x , test_x , train_y , test_y = train_test_split(X[:100] , y[:100] , test_size = 0.5,shuffle = True , random_state = 4567)
# 首先查看提取出来的X的形状   查看该数据集是几维（一个样本有几列） 然后根据维度来调整生成权重参数的数量
# test_x

# 设置权重参数  一行四列的随机数组 服从正态分布
theta = np.random.randn(1,4)  # 随机向量
bias = 0   # 设置偏置

# 设置超参数  超参数需要手动设置，根据经验，试验设置
lr = 0.1   # 学习率
epoch = 5000 # 测试时循环的次数

# 前向传播 将自变量x和权重参数theta、偏置bias代入模型 计算出激活函数y_hat
def forward(x , theta , bias):
    # 求线性函数的因变量值
    z = np.dot(theta,x.T) + bias
    # 求预测概率值用Sigmoid激活函数  (0,1)区间
    y_hat = 1 / (1 + np.exp(-z))
    return y_hat

# 求损失函数  将y和y的预测概率值导入计算
def loss_function(y , y_hat):
    e = 1e-8 # 防止y_hat为0
    return -y * np.log(y_hat + e) - (1 - y) * np.log(1 - y_hat + e)

# 计算梯度 将x、y、y的预测概率值代入
def calc_gradient(x,y,y_hat):
    # m取x的最后一维的数量
    m = x.shape[-1]
    # 求平均梯度
    delta_w  = np.dot(y_hat-y,x) / m
    # 计算误差的均值作为偏置的梯度
    delta_b = np.mean(y_hat - y)
    return delta_w , delta_b

# 循环
for i in range(epoch):
    # 正向传播 求预测值
    y_hat = forward(train_x , theta , bias)

    # 计算损失
    loss = np.mean(loss_function(train_y , y_hat))
    #
    if i % 100 == 0 :
        print('step:',i,'loss:',loss)

    #计算梯度下降
    dw , db = calc_gradient(train_x , train_y , y_hat)
    theta = theta - lr * dw
    bias = bias - lr * db


# 测试模型准确率
def predict(x , theta , bias):
    pred = forward(x , theta , bias)[0]
    return 1 if pred > 0.5 else 0

count = 0
total = len(test_x)

for i in range (total):
    x = test_x[i]
    y_true = test_y[i]
    y_pred = predict(x , theta , bias)
    if y_pred == y_true :
        count += 1
accuracy = count / total
print(f'该模型的准确率：{accuracy:1%}')

# # 保存模型中的theta和bias参数
# dump({'theta':theta , 'bias':bias},"iris_model.joblib")


step: 0 loss: 3.3866458155294406
step: 100 loss: 0.003316971840838307
step: 200 loss: 0.0022787352090518995
step: 300 loss: 0.0017466959528460028
step: 400 loss: 0.001421401139964541
step: 500 loss: 0.0012011945780399651
step: 600 loss: 0.0010418642361912196
step: 700 loss: 0.0009210289361479538
step: 800 loss: 0.0008261208205620425
step: 900 loss: 0.0007495275399810472
step: 1000 loss: 0.0006863636612459913
step: 1100 loss: 0.0006333471293855448
step: 1200 loss: 0.0005881897433594657
step: 1300 loss: 0.0005492464655718065
step: 1400 loss: 0.0005153036763266046
step: 1500 loss: 0.000485446056032969
step: 1600 loss: 0.0004589699784568224
step: 1700 loss: 0.0004353254764962151
step: 1800 loss: 0.0004140763426964105
step: 1900 loss: 0.0003948720721365286
step: 2000 loss: 0.0003774277349583216
step: 2100 loss: 0.0003615092780438522
step: 2200 loss: 0.0003469226185014743
step: 2300 loss: 0.0003335054331995033
step: 2400 loss: 0.00032112089649082715
step: 2500 loss: 0.00030965284656012934
st