这将是一个对比实现算法的过程

1、manually,用numpy手动实现：预测、梯度计算、loss、参数更新

2、auto gradient

3、用pytorch实现loss和optimizer

4、全部用pytorch实现

# part 1

In [2]:
import numpy as np

In [2]:
# only weight not bias
# f = w * x

#target function
# f = 2 * x

In [28]:
x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0   # for beginning

# model prediction
def forward(x):
    return w * x

# loss function = MSE
def loss(y, y_hat):
    return np.mean((y_hat - y)**2)
    # return ((y_hat - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# MSE导数 dJ/dw = 1/N * 2x * (w*x - y)
def  gradient(x, y, y_hat):
    #这段视频中的代码其实是错误的，它并没有平均，dot算出来是标量，但它收敛更快，因为更新步长大了，相当于4倍于lr
    return np.dot(2*x, y_hat-y).mean()   

print(f'Prediction before trainingL f(5) = {forward(5):.3f}')


# Training
lr = 0.01
n_iters = 10

for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = forward(x)
    
    # loss
    l = loss(y, y_pred)
    
    # gradient
    dw = gradient(x, y, y_pred)
    
    # update weights
    w -= lr * dw
    
    if epoch % 1 == 0:
        print(f'epoch {epoch+1}: w = {w:.4f}, dw = {dw:.4f}, loss = {l:.8f}')
    
print(f'Prediction after trainingL f(5) = {forward(5):.4f}')



Prediction before trainingL f(5) = 0.000
epoch 1: w = 1.2000, dw = -120.0000, loss = 30.00000000
epoch 2: w = 1.6800, dw = -48.0000, loss = 4.79999924
epoch 3: w = 1.8720, dw = -19.2000, loss = 0.76800019
epoch 4: w = 1.9488, dw = -7.6800, loss = 0.12288000
epoch 5: w = 1.9795, dw = -3.0720, loss = 0.01966083
epoch 6: w = 1.9918, dw = -1.2288, loss = 0.00314570
epoch 7: w = 1.9967, dw = -0.4915, loss = 0.00050332
epoch 8: w = 1.9987, dw = -0.1966, loss = 0.00008053
epoch 9: w = 1.9995, dw = -0.0786, loss = 0.00001288
epoch 10: w = 1.9998, dw = -0.0315, loss = 0.00000206
Prediction after trainingL f(5) = 9.9990


In [34]:
x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0   # for beginning

# model prediction
def forward(x):
    return w * x

# loss function = MSE
def loss(y, y_hat):
    return np.mean((y_hat - y)**2)
    # return ((y_hat - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# MSE导数 dJ/dw = 1/N * 2x * (w*x - y)
def  gradient(x, y, y_hat):
    return np.mean((2*x) * (y_hat-y))

print(f'Prediction before trainingL f(5) = {forward(5):.3f}')


# Training
lr = 0.01
n_iters = 10

for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = forward(x)
    
    # loss
    l = loss(y, y_pred)
    
    # gradient
    dw = gradient(x, y, y_pred)
    
    # update weights
    w -= lr * dw
    
    if epoch % 1 == 0:
        print(f'epoch {epoch+1}: w = {w:.4f}, dw = {dw:.4f}, loss = {l:.8f}')
    
print(f'Prediction after trainingL f(5) = {forward(5):.4f}')



Prediction before trainingL f(5) = 0.000
epoch 1: w = 0.3000, dw = -30.0000, loss = 30.00000000
epoch 2: w = 0.5550, dw = -25.5000, loss = 21.67499924
epoch 3: w = 0.7717, dw = -21.6750, loss = 15.66018677
epoch 4: w = 0.9560, dw = -18.4238, loss = 11.31448555
epoch 5: w = 1.1126, dw = -15.6602, loss = 8.17471600
epoch 6: w = 1.2457, dw = -13.3112, loss = 5.90623236
epoch 7: w = 1.3588, dw = -11.3145, loss = 4.26725292
epoch 8: w = 1.4550, dw = -9.6173, loss = 3.08308983
epoch 9: w = 1.5368, dw = -8.1747, loss = 2.22753215
epoch 10: w = 1.6063, dw = -6.9485, loss = 1.60939264
Prediction after trainingL f(5) = 8.0313


In [22]:
x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)
y_hat = np.zeros(4, dtype=np.float32)

a = np.dot(2*x, y_hat-y).mean()
b = np.mean((2*x) * (y_hat-y))
a,b


(-120.0, -30.0)

# part 2 

gradient with pytorch

In [3]:
import torch

In [4]:
# 和上面numpy采用正确gradient的方法收敛性一样

x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)   # for beginning

# model prediction
def forward(x):
    return w * x

# loss function = MSE
def loss(y, y_hat):
    return torch.mean((y_hat - y)**2)
    # return ((y_hat - y)**2).mean()

# gradient
# 用pytorch的，不再需要单独定义

print(f'Prediction before trainingL f(5) = {forward(5):.3f}')


# Training
lr = 0.01
n_iters = 100

for epoch in range(n_iters):
    #prediction = forward pass
    y_pred = forward(x)
    
    # loss
    l = loss(y, y_pred)
    
    # gradient = backward pass
    l.backward()
    
    # update weights
    with torch.no_grad():
        w -= lr * w.grad
    
    # 梯度清零
    w.grad.zero_()
    
    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w = {w:.4f}, loss = {l:.8f}')
    
print(f'Prediction after trainingL f(5) = {forward(5):.4f}')



Prediction before trainingL f(5) = 0.000
epoch 1: w = 0.3000, loss = 30.00000000
epoch 11: w = 1.6653, loss = 1.16278565
epoch 21: w = 1.9341, loss = 0.04506890
epoch 31: w = 1.9870, loss = 0.00174685
epoch 41: w = 1.9974, loss = 0.00006770
epoch 51: w = 1.9995, loss = 0.00000262
epoch 61: w = 1.9999, loss = 0.00000010
epoch 71: w = 2.0000, loss = 0.00000000
epoch 81: w = 2.0000, loss = 0.00000000
epoch 91: w = 2.0000, loss = 0.00000000
Prediction after trainingL f(5) = 10.0000
