# Lab-03 Deeper Look at GD

In [1]:
import numpy as np
import pandas as pd
import torch

## Data

In [2]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

## Hypothesis

In [3]:
W = torch.zeros(1, requires_grad=True)
hypothesis = x_train * W

## Cost (MSE)

$$
cost(W) = \mathcal{J} (W)
= \frac{1}{m} \sum_{i=1}^{m}{\mathcal{L}(\hat{y^{(i)}}, y^{(i)}})
= \frac{1}{m} \sum_{i=1}^{m}{(Wx^{(i)}-y^{(i)})^2}\\
$$
$$
\nabla W = \frac{\partial J}{\partial W}
= \frac{2}{m} \sum_{i=1}^{m}{(Wx^{(i)}-y^{(i)}) x^{(i)}}
$$

$$
W := W - \alpha \nabla W
$$

In [4]:
cost = torch.mean((hypothesis - y_train)**2)

In [5]:
cost

tensor(4.6667, grad_fn=<MeanBackward0>)

In [6]:
gradient = 2 * torch.mean((W * x_train - y_train) * x_train)
lr = 0.1
W = W - lr * gradient

In [7]:
W

tensor([0.9333], grad_fn=<SubBackward0>)

In [8]:
gradient = 2 * torch.mean((W * x_train - y_train) * x_train)
lr = 0.1
W = W - lr * gradient

In [9]:
W

tensor([0.9956], grad_fn=<SubBackward0>)

## Full Code

In [10]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

W = torch.randn(1, requires_grad=True)

lr= 0.1

nb_epochs = 10
for epoch in range(nb_epochs + 1):
    
    # y_pred
    y_pred = x_train * W
    
    # cost, gradient
    cost = torch.mean((y_pred - y_train)**2)
    gradient = 2 * torch.mean((W * x_train - y_train) * x_train)
    
    print('Epoch {:4d}/{} W: {:.3f}, Cost: {:.6f}'.format(
        epoch, nb_epochs, W.item(), cost.item()
    ))
    
    # update (W, b)
    W = W - lr * gradient

Epoch    0/10 W: -1.379, Cost: 26.408022
Epoch    1/10 W: 0.841, Cost: 0.117369
Epoch    2/10 W: 0.989, Cost: 0.000522
Epoch    3/10 W: 0.999, Cost: 0.000002
Epoch    4/10 W: 1.000, Cost: 0.000000
Epoch    5/10 W: 1.000, Cost: 0.000000
Epoch    6/10 W: 1.000, Cost: 0.000000
Epoch    7/10 W: 1.000, Cost: 0.000000
Epoch    8/10 W: 1.000, Cost: 0.000000
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000


## Gradient Descent with torch.optim

In [11]:
from torch import optim

In [12]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

W = torch.randn(1, requires_grad=True)

optimizer = optim.SGD([W], lr=0.1)

nb_epochs = 10
for epoch in range(nb_epochs + 1):
    
    # y_pred
    y_pred = x_train * W
    
    # cost, gradient
    cost = torch.mean((y_pred - y_train)**2)
    
    print('Epoch {:4d}/{} W: {:.3f}, Cost: {:.6f}'.format(
        epoch, nb_epochs, W.item(), cost.item()
    ))
    
    # update (W, b)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

Epoch    0/10 W: -0.779, Cost: 14.769149
Epoch    1/10 W: 0.881, Cost: 0.065641
Epoch    2/10 W: 0.992, Cost: 0.000292
Epoch    3/10 W: 0.999, Cost: 0.000001
Epoch    4/10 W: 1.000, Cost: 0.000000
Epoch    5/10 W: 1.000, Cost: 0.000000
Epoch    6/10 W: 1.000, Cost: 0.000000
Epoch    7/10 W: 1.000, Cost: 0.000000
Epoch    8/10 W: 1.000, Cost: 0.000000
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000
