In [9]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np

In [10]:
# 1、生成训练数据
X_ori,y_ori = load_iris(return_X_y=True)
X = X_ori[:100]  # 取前100个数据
y = y_ori[:100]  # 取前100个标签(0,1)
# print(X)
# print(y)
# 数据拆分
# 局部样本训练模型（过拟合模型）测试预测不好
# 新样本数据模型表现不好（泛化能力差）
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [12]:
theta = np.random.randn(1,4)
print(theta)
bias = 0
# 学习率
lr = 0.1
# 训练轮数
epochs = 3000

[[-0.55066162  0.32109787 -0.21078265  0.07885826]]


In [13]:
# 2、模型计算函数
def forward(x,theta,bias):
    # 线性运行
    z = np.dot(theta,x.T) + bias
    # sigmoid
    y_hat = 1 / ( 1 + np.exp(-z) )
    return y_hat

# 3、计算损失函数
def loss(y,y_hat):
    e = 1e-8
    return - y * np.log(y_hat + e) - (1 - y) * np.log(1 - y_hat + e)

# 4、计算梯度
def calc_gradient(x,y,y_hat):
    m = x.shape[-1]
    # theta梯度计算
    delta_theta = np.dot((y_hat-y),x) / m
    # bias梯度计算
    delta_bias = np.mean(y_hat-y)
    # 返回梯度
    return delta_theta,delta_bias

# 5、模型训练
for i in range(epochs):
    # 前向计算
    y_hat = forward(X_train,theta,bias)
    # 计算损失
    loss_val = loss(y_train,y_hat)
    # 计算梯度
    delta_theta,delta_bias = calc_gradient(X_train,y_train,y_hat)
    # 更新参数
    theta = theta - lr * delta_theta
    bias = bias - lr * delta_bias
    
    if i % 100 == 0:
        # 计算准确率
        acc = np.mean(np.round(y_hat) == y_train)  # [False,True,...,False] -> [0,1,...,0]
        print(f"epoch: {i}, loss: {np.mean(loss_val)}, acc: {acc}")

epoch: 0, loss: 1.5892957523758209, acc: 0.5285714285714286
epoch: 100, loss: 0.0004998814148641961, acc: 1.0
epoch: 200, loss: 0.00046732984338628014, acc: 1.0
epoch: 300, loss: 0.0004389201911902023, acc: 1.0
epoch: 400, loss: 0.0004138968770589126, acc: 1.0
epoch: 500, loss: 0.0003916804035853248, acc: 1.0
epoch: 600, loss: 0.00037181724271506553, acc: 1.0
epoch: 700, loss: 0.0003539472362678281, acc: 1.0
epoch: 800, loss: 0.00033778064915684165, acc: 1.0
epoch: 900, loss: 0.0003230816902407136, acc: 1.0
epoch: 1000, loss: 0.00030965646570044414, acc: 1.0
epoch: 1100, loss: 0.00029734403115257293, acc: 1.0
epoch: 1200, loss: 0.00028600964909263575, acc: 1.0
epoch: 1300, loss: 0.00027553964138980384, acc: 1.0
epoch: 1400, loss: 0.000265837412483286, acc: 1.0
epoch: 1500, loss: 0.00025682034340615795, acc: 1.0
epoch: 1600, loss: 0.0002484173415720505, acc: 1.0
epoch: 1700, loss: 0.00024056688997719025, acc: 1.0
epoch: 1800, loss: 0.00023321548073007486, acc: 1.0
epoch: 1900, loss: 0.0

In [14]:
np.save('theta.npy',theta)
np.save('bias.npy',bias)


In [15]:
theta_load = np.load('theta.npy')
bias_load = np.load('bias.npy')

In [21]:
# 模型推理
idX = np.random.randint(len(X_test)) #随机选择一个测试样本索引
x = X_test[idX]
y = y_test[idX]
y_predict = np.round(forward(x,theta_load,bias_load))
print(f"y: {y}, predict: {y_predict}")

y: 1, predict: [1.]
