In [1]:
import torch

In [2]:
x = torch.tensor(1.0) # Input value
y = torch.tensor(2.0) # Target value
w = torch.tensor(1.0, requires_grad=True)

### Forward pass

In [3]:
y_hat = w*x # Predicted value
loss = (y_hat-y)**2
print(f"Loss = {loss}")

Loss = 1.0


### Backward pass

In [4]:
# Gradient Computation
loss.backward()
print(w.grad)

tensor(-2.)


# Gradient Descent

In [5]:
import numpy as np

In [6]:
# f = 2*x
X = np.array([1, 2, 3, 4], dtype=np.float32) # Input
# y = np.array([2, 4, 6, 8], dtype=np.float32)
y = 2*X  # Target values
w = 0.0 # Intial Weight

### Forward Pass

In [7]:
def forward(x):
    return w*x

### Calculating loss

In [8]:
def loss(y, y_pred):
    '''
    Calculates MSE loss
    '''
    return ((y_pred-y)**2).mean()

### Calculating Gradient manually

Derivative of loss wrt w is:

$\frac{dL}{dw} = \frac{1}{n} (2x \cdot (y_{\text{predicted}} - y))$


In [9]:
def gradient(x, y, y_predicted):
    '''
    Calculates gradient of MSE loss wrt w
    '''
    return np.dot(2*x, y_pred-y).mean()

In [10]:
print(f"Prediction before training:\nf(5) = {forward(5):.3f}")

Prediction before training:
f(5) = 0.000


### Training

In [11]:
lr = 0.01
n_iters = 20
print(f"Initial Weight = {w}")
for epoch in range(1, n_iters+1):
    # Prediction = fwd pass
    y_pred = forward(X)

    # Loss
    l = loss(y=y, y_pred=y_pred)

    # Gradients
    dw = gradient(X, y, y_pred)

    # Update weights
    w -= lr*dw

    # Print training info at every step
    if epoch%2==0:
        print(f"Epoch {epoch}: w = {w:.3f}, loss={l:.5f}")

print(f"Prediction after training:\nf(5) = {forward(5):.3f}")

Initial Weight = 0.0
Epoch 2: w = 1.680, loss=4.80000
Epoch 4: w = 1.949, loss=0.12288
Epoch 6: w = 1.992, loss=0.00315
Epoch 8: w = 1.999, loss=0.00008
Epoch 10: w = 2.000, loss=0.00000
Epoch 12: w = 2.000, loss=0.00000
Epoch 14: w = 2.000, loss=0.00000
Epoch 16: w = 2.000, loss=0.00000
Epoch 18: w = 2.000, loss=0.00000
Epoch 20: w = 2.000, loss=0.00000
Prediction after training:
f(5) = 10.000


# Calculating gradient using PyTorch

Earlier, we were calculating gradient $\frac{\partial w}{\partial l}$ using the formula. But now, **loss.backward()** autimatically does this for us.




In [12]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
y = 2*X

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [13]:
def forward(x):
    return w*x


def loss(y, y_pred):
    '''
    Calculates MSE loss
    '''
    return ((y_pred-y)**2).mean()

In PyTorch, operations become part of the computational graph when they involve tensors that have **requires_grad=True**

Updating weights should not be the part of computational graph

In [14]:
print(f"Prediction before training:\nf(5) = {forward(5):.3f}")

lr = 0.01
n_iters = 100

for epoch in range(1, n_iters+1):
    y_pred = forward(X)

    l = loss(y, y_pred)

    # Calculating gradient automatically
    # dl/dw
    l.backward()

    with torch.no_grad():
        w -= lr*w.grad

    # Zero Grads
    w.grad.zero_()

    if epoch%10==0:
        print(f"Epoch {epoch}: w = {w:.3f}, loss={l:.5f}")

print(f"Prediction after training:\nf(5) = {forward(5):.3f}")

Prediction before training:
f(5) = 0.000
Epoch 10: w = 1.606, loss=1.60939
Epoch 20: w = 1.922, loss=0.06238
Epoch 30: w = 1.985, loss=0.00242
Epoch 40: w = 1.997, loss=0.00009
Epoch 50: w = 1.999, loss=0.00000
Epoch 60: w = 2.000, loss=0.00000
Epoch 70: w = 2.000, loss=0.00000
Epoch 80: w = 2.000, loss=0.00000
Epoch 90: w = 2.000, loss=0.00000
Epoch 100: w = 2.000, loss=0.00000
Prediction after training:
f(5) = 10.000
