In [8]:
import numpy as np

#Numpy implementation requires manual gradient implementation

#f = w * x
X = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)
w = 0.0

#model prediction
def forward(X):
    return w * X

#loss
def loss(y,y_pred):
    return ((y - y_pred) ** 2).mean()

#gradients
#MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2*x * (w*x - y)
def gradient(x,y,y_pred):
    return np.dot(2*x, y_pred-y).mean()

print(f'Pred before training: f() = {forward(5):.3f}')

#Training
lr = 0.01
n_iters = 20

for epoch in range(n_iters):
    #pred: forward pass
    y_pred = forward(X)
    l = loss(y,y_pred)
    
    #gradients
    dw = gradient(X,y,y_pred)
    
    # update weights
    w -= (lr * dw)
    
    if epoch % 1 == 0:
        print(f'epoch {epoch + 1}:\t w={w:.3f} \t loss={l:.8f}')

print(f'Pred after training: f() = {forward(5):.3f}') 

Pred before training: f() = 0.000
epoch 1:	 w=1.200 	 loss=30.00000000
epoch 2:	 w=1.680 	 loss=4.79999924
epoch 3:	 w=1.872 	 loss=0.76800019
epoch 4:	 w=1.949 	 loss=0.12288000
epoch 5:	 w=1.980 	 loss=0.01966083
epoch 6:	 w=1.992 	 loss=0.00314574
epoch 7:	 w=1.997 	 loss=0.00050331
epoch 8:	 w=1.999 	 loss=0.00008053
epoch 9:	 w=1.999 	 loss=0.00001288
epoch 10:	 w=2.000 	 loss=0.00000206
epoch 11:	 w=2.000 	 loss=0.00000033
epoch 12:	 w=2.000 	 loss=0.00000005
epoch 13:	 w=2.000 	 loss=0.00000001
epoch 14:	 w=2.000 	 loss=0.00000000
epoch 15:	 w=2.000 	 loss=0.00000000
epoch 16:	 w=2.000 	 loss=0.00000000
epoch 17:	 w=2.000 	 loss=0.00000000
epoch 18:	 w=2.000 	 loss=0.00000000
epoch 19:	 w=2.000 	 loss=0.00000000
epoch 20:	 w=2.000 	 loss=0.00000000
Pred after training: f() = 10.000


In [6]:
print(w)

1.9999999952316283


In [15]:
#Torch implementaiton
import torch
X = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

#Resuse the same forward and backward functions

n_iters = 100

for epoch in range(n_iters):
    #pred
    y_pred = forward(X)
    #loss
    l = loss(y,y_pred)
    # gradients
    l.backward()
    
    #Remember to update weights with no grads to avoid the auto gradient computation graph from considering it
    with torch.no_grad():
        w -= lr * w.grad
    
    #Remember to empty gradients to avoid gradient accumilation for next iteration
    w.grad.zero_()
    
    if epoch % 10 == 0:
        print(f'epoch {epoch + 1}:\t w={w:.3f} \t loss={l:.8f}')

print(f'Pred after training: f() = {forward(5):.3f}')

epoch 1:	 w=0.300 	 loss=30.00000000
epoch 11:	 w=1.665 	 loss=1.16278565
epoch 21:	 w=1.934 	 loss=0.04506890
epoch 31:	 w=1.987 	 loss=0.00174685
epoch 41:	 w=1.997 	 loss=0.00006770
epoch 51:	 w=1.999 	 loss=0.00000262
epoch 61:	 w=2.000 	 loss=0.00000010
epoch 71:	 w=2.000 	 loss=0.00000000
epoch 81:	 w=2.000 	 loss=0.00000000
epoch 91:	 w=2.000 	 loss=0.00000000
Pred after training: f() = 10.000
