In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [9]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [13]:
X,y = load_iris(return_X_y=True)
X = X[:100]
y = y[:100]
X.shape

(100, 4)

In [22]:
def train_step(bias=0.0, ratio = 0.2, lr=0.01, epochs = 1000):
    # 数据拆分
    train_x, test_x, train_y, test_y = train_test_split(X,y,test_size=ratio)
    theta = np.random.randn(1,4)
    print(f"theta= {theta} before training")
    # training step
    def forward (x, theta, bias):
        z = np.dot(theta, x.T) + bias
        y_hat = 1/(1 + np.exp(-z)) # sigmoid
        return y_hat

    def loss(y, y_hat):
        epsilon = 1e-8 # avoid np.log(0)
        return -y * np.log(y_hat + epsilon) - (1 - y)*np.log(1 - y_hat + epsilon)

    def gradient(x,y,y_hat):
        delta_theta = np.dot((y_hat - y), x)/ x.shape[-1] # feature
        delta_bias = np.mean(y_hat - y)
        return delta_theta, delta_bias

    for i in range(epochs):
        y_hat = forward(train_x,theta,bias)
        loss_value = loss(train_y,y_hat)
        delta_theta, delta_bias = gradient(train_x, train_y,y_hat)
        # update
        theta = theta - lr * delta_theta
        bias = bias - lr * delta_bias
        acc = np.mean(np.round(y_hat) == train_y)
        print(f"epoch: {i}, loss: {np.mean(loss_value)},acc:{acc}")
    
    # 保存模型参数
    save_path = f"ratio_{ratio}_lr_{lr}.npz"
    np.savez(save_path, theta=theta, bias=bias,test_x= test_x,test_y = test_y)

In [23]:
train_step() # ratio = 0.2, lr=0.01, epochs = 1000

theta= [[2.26629735 0.7183799  2.9241312  2.23839773]] before training
epoch: 0, loss: 9.002473899474754,acc:0.4875
epoch: 1, loss: 7.446242995692032,acc:0.4875
epoch: 2, loss: 5.439386181331282,acc:0.4875
epoch: 3, loss: 3.415125328565671,acc:0.4875
epoch: 4, loss: 1.4290697811626252,acc:0.4875
epoch: 5, loss: 0.1819811774860751,acc:0.975
epoch: 6, loss: 0.07009156942386625,acc:1.0
epoch: 7, loss: 0.04697759446480866,acc:1.0
epoch: 8, loss: 0.036819211136566735,acc:1.0
epoch: 9, loss: 0.0313130274524168,acc:1.0
epoch: 10, loss: 0.02800905358563307,acc:1.0
epoch: 11, loss: 0.025904812803657806,acc:1.0
epoch: 12, loss: 0.02450972212362973,acc:1.0
epoch: 13, loss: 0.023555303840676226,acc:1.0
epoch: 14, loss: 0.022883486593433262,acc:1.0
epoch: 15, loss: 0.02239660193555144,acc:1.0
epoch: 16, loss: 0.0220323624702168,acc:1.0
epoch: 17, loss: 0.021750221710589334,acc:1.0
epoch: 18, loss: 0.021523436652208162,acc:1.0
epoch: 19, loss: 0.021334220548671005,acc:1.0
epoch: 20, loss: 0.02117067

In [25]:
train_step(ratio=0.4, lr= 0.01,epochs=1000) # ratio = 0.4, lr=0.01, epochs = 1000

theta= [[ 0.01795739 -0.71732467 -1.77420578 -0.01536885]] before training
epoch: 0, loss: 5.12777285538375,acc:0.4666666666666667
epoch: 1, loss: 2.537386953277863,acc:0.4666666666666667
epoch: 2, loss: 1.323168392622417,acc:0.2
epoch: 3, loss: 1.1892895088130109,acc:0.21666666666666667
epoch: 4, loss: 1.067533599823079,acc:0.26666666666666666
epoch: 5, loss: 0.9582351349177831,acc:0.3
epoch: 6, loss: 0.8611828040774598,acc:0.4166666666666667
epoch: 7, loss: 0.775758941052722,acc:0.48333333333333334
epoch: 8, loss: 0.7010270624090758,acc:0.5166666666666667
epoch: 9, loss: 0.6358940094981147,acc:0.5666666666666667
epoch: 10, loss: 0.5792111679431533,acc:0.6666666666666666
epoch: 11, loss: 0.529869003454423,acc:0.8166666666666667
epoch: 12, loss: 0.4868429051885858,acc:0.9166666666666666
epoch: 13, loss: 0.44922166975962957,acc:0.95
epoch: 14, loss: 0.41621210952037563,acc:0.9833333333333333
epoch: 15, loss: 0.38713527667614217,acc:0.9833333333333333
epoch: 16, loss: 0.3614153871933115,

In [26]:
train_step(ratio=0.2, lr= 0.1,epochs=1000) # ratio = 0.2, lr=0.1, epochs = 1000

theta= [[-0.08156465 -0.95508915  1.59503591 -1.17899525]] before training
epoch: 0, loss: 0.15304163513208463,acc:1.0
epoch: 1, loss: 0.104306603587308,acc:1.0
epoch: 2, loss: 0.23401381345753097,acc:0.925
epoch: 3, loss: 5.495030475141637,acc:0.475
epoch: 4, loss: 8.749822324513474,acc:0.525
epoch: 5, loss: 0.052662933505490374,acc:1.0
epoch: 6, loss: 0.0023363496136367306,acc:1.0
epoch: 7, loss: 0.0021663603098767644,acc:1.0
epoch: 8, loss: 0.0020459470449796778,acc:1.0
epoch: 9, loss: 0.001958627213409165,acc:1.0
epoch: 10, loss: 0.0018940885913367774,acc:1.0
epoch: 11, loss: 0.0018455689569549954,acc:1.0
epoch: 12, loss: 0.0018084811638149714,acc:1.0
epoch: 13, loss: 0.0017796368511704755,acc:1.0
epoch: 14, loss: 0.0017567811441901813,acc:1.0
epoch: 15, loss: 0.0017382997468488911,acc:1.0
epoch: 16, loss: 0.001723027087169688,acc:1.0
epoch: 17, loss: 0.0017101166389746642,acc:1.0
epoch: 18, loss: 0.0016989511244543038,acc:1.0
epoch: 19, loss: 0.0016890792001638324,acc:1.0
epoch: 2

In [27]:
train_step(ratio=0.4, lr= 0.1,epochs=1000) # ratio = 0.4, lr=0.1, epochs = 1000

theta= [[-0.21445834  0.55821874 -0.1471407  -0.43511832]] before training
epoch: 0, loss: 1.1225269903971957,acc:0.016666666666666666
epoch: 1, loss: 3.3683727378104256,acc:0.5
epoch: 2, loss: 9.178330280561237,acc:0.5
epoch: 3, loss: 5.8969641787530565,acc:0.5
epoch: 4, loss: 4.963323078860665,acc:0.5
epoch: 5, loss: 8.252136170134229,acc:0.5
epoch: 6, loss: 0.057576797082524236,acc:0.9833333333333333
epoch: 7, loss: 0.0014484184949158977,acc:1.0
epoch: 8, loss: 0.001349148401415602,acc:1.0
epoch: 9, loss: 0.0012686732167417436,acc:1.0
epoch: 10, loss: 0.0012024277684050645,acc:1.0
epoch: 11, loss: 0.001147202950698004,acc:1.0
epoch: 12, loss: 0.0011006753863541554,acc:1.0
epoch: 13, loss: 0.0010611208317097352,acc:1.0
epoch: 14, loss: 0.0010272324730663228,acc:1.0
epoch: 15, loss: 0.0009980017848827025,acc:1.0
epoch: 16, loss: 0.00097263811931551,acc:1.0
epoch: 17, loss: 0.0009505130559110509,acc:1.0
epoch: 18, loss: 0.000931121028655385,acc:1.0
epoch: 19, loss: 0.000914050920119889

In [28]:
train_step(ratio=0.2, lr= 0.1,epochs=1000) # ratio = 0.2, lr=0.9, epochs = 1000

theta= [[-0.89547623 -1.10258994  1.43162474 -0.60333177]] before training
epoch: 0, loss: 1.527139721971519,acc:0.5125
epoch: 1, loss: 9.440598757884384,acc:0.4875
epoch: 2, loss: 0.011748432604296071,acc:1.0
epoch: 3, loss: 0.007214279878851271,acc:1.0
epoch: 4, loss: 0.006580994176660126,acc:1.0
epoch: 5, loss: 0.006379004514655959,acc:1.0
epoch: 6, loss: 0.006269547606595571,acc:1.0
epoch: 7, loss: 0.006182711170345727,acc:1.0
epoch: 8, loss: 0.00610274350173442,acc:1.0
epoch: 9, loss: 0.006025962501026952,acc:1.0
epoch: 10, loss: 0.005951444557534341,acc:1.0
epoch: 11, loss: 0.005878910215342216,acc:1.0
epoch: 12, loss: 0.005808235828303371,acc:1.0
epoch: 13, loss: 0.005739338397066153,acc:1.0
epoch: 14, loss: 0.005672147667467297,acc:1.0
epoch: 15, loss: 0.005606599087832777,acc:1.0
epoch: 16, loss: 0.0055426318780303465,acc:1.0
epoch: 17, loss: 0.005480188389468612,acc:1.0
epoch: 18, loss: 0.005419213806365913,acc:1.0
epoch: 19, loss: 0.005359655945995645,acc:1.0
epoch: 20, loss

In [30]:
train_step(ratio=0.4, lr= 0.9,epochs=1000) # ratio = 0.4, lr=0.9, epochs = 1000

theta= [[ 1.13240398 -1.0090983   0.69173311 -0.14531365]] before training
epoch: 0, loss: 1.6056235675748276,acc:0.5
epoch: 1, loss: 9.210340366976185,acc:0.5
epoch: 2, loss: 9.210340366976187,acc:0.5
epoch: 3, loss: 9.210340366976185,acc:0.5
epoch: 4, loss: 9.210340366976187,acc:0.5
epoch: 5, loss: 9.21034036697589,acc:0.5
epoch: 6, loss: 9.210340366976187,acc:0.5
epoch: 7, loss: 0.0002885565556161663,acc:1.0
epoch: 8, loss: 0.00024515894802255525,acc:1.0
epoch: 9, loss: 0.00021338496229936666,acc:1.0
epoch: 10, loss: 0.00018905951484092434,acc:1.0
epoch: 11, loss: 0.0001698090399195472,acc:1.0
epoch: 12, loss: 0.00015417904392684228,acc:1.0
epoch: 13, loss: 0.00014122619852650821,acc:1.0
epoch: 14, loss: 0.00013031081504951046,acc:1.0
epoch: 15, loss: 0.00012098320397052293,acc:1.0
epoch: 16, loss: 0.00011291769794337263,acc:1.0
epoch: 17, loss: 0.00010587247457525831,acc:1.0
epoch: 18, loss: 9.966409763632889e-05,acc:1.0
epoch: 19, loss: 9.415083209207387e-05,acc:1.0
epoch: 20, los

In [31]:
train_step(ratio=0.2, lr= 10,epochs=1000) # ratio = 0.2, lr=20, epochs = 1000

theta= [[-0.52678408 -1.34700188  0.23750588 -1.50934448]] before training
epoch: 0, loss: 4.296730247550523,acc:0.45
epoch: 1, loss: 8.289306329278567,acc:0.55
epoch: 2, loss: 8.289306329278567,acc:0.55
epoch: 3, loss: 10.131374404673803,acc:0.45
epoch: 4, loss: 8.289306329278567,acc:0.55
epoch: 5, loss: -9.999999889225291e-09,acc:1.0
epoch: 6, loss: -9.999999889225291e-09,acc:1.0
epoch: 7, loss: -9.999999889225291e-09,acc:1.0
epoch: 8, loss: -9.999999889225291e-09,acc:1.0
epoch: 9, loss: -9.999999889225291e-09,acc:1.0
epoch: 10, loss: -9.999999889225291e-09,acc:1.0
epoch: 11, loss: -9.999999889225291e-09,acc:1.0
epoch: 12, loss: -9.999999889225291e-09,acc:1.0
epoch: 13, loss: -9.999999889225291e-09,acc:1.0
epoch: 14, loss: -9.999999889225291e-09,acc:1.0
epoch: 15, loss: -9.999999889225291e-09,acc:1.0
epoch: 16, loss: -9.999999889225291e-09,acc:1.0
epoch: 17, loss: -9.999999889225291e-09,acc:1.0
epoch: 18, loss: -9.999999889225291e-09,acc:1.0
epoch: 19, loss: -9.999999889225291e-09,ac

  y_hat = 1/(1 + np.exp(-z)) # sigmoid
