# Đạo hàm tự động với ``torch.autograd``


In [1]:
import numpy as np
import torch


## 1. Vì sao cần đạo hàm?

- Mục tiêu: Dùng **Gradient Descent** để tìm min f(x)
- Giải thích Gradient Descent: $ x^{(k+1)} = x^{(k)} - \eta \times f'(x) $

## 2. Ví dụ sử dụng Gradient Descent với numpy
- Bài toán: tìm cực tiểu hàm $ f(x) = x^2 - 2x + 5 $
- Dễ thấy $ min(f) $ = 4 tại $ x = 1 $
- Tính đạo hàm thủ công: $ f'(x) = 2x - 2 $

In [2]:
# Dùng code
learning_rate = 0.1
x_0 = np.random.rand(5)
for iter in range(100):
    x_0 = x_0 - learning_rate*(2*x_0 - 2)
    print(x_0)

[0.97731305 0.32254855 0.40530297 0.65032974 0.48677213]
[0.98185044 0.45803884 0.52424237 0.72026379 0.5894177 ]
[0.98548035 0.56643107 0.6193939  0.77621103 0.67153416]
[0.98838428 0.65314486 0.69551512 0.82096883 0.73722733]
[0.99070743 0.72251589 0.7564121  0.85677506 0.78978186]
[0.99256594 0.77801271 0.80512968 0.88542005 0.83182549]
[0.99405275 0.82241017 0.84410374 0.90833604 0.86546039]
[0.9952422  0.85792813 0.87528299 0.92666883 0.89236831]
[0.99619376 0.88634251 0.90022639 0.94133507 0.91389465]
[0.99695501 0.90907401 0.92018112 0.95306805 0.93111572]
[0.99756401 0.9272592  0.93614489 0.96245444 0.94489258]
[0.99805121 0.94180736 0.94891591 0.96996355 0.95591406]
[0.99844096 0.95344589 0.95913273 0.97597084 0.96473125]
[0.99875277 0.96275671 0.96730618 0.98077667 0.971785  ]
[0.99900222 0.97020537 0.97384495 0.98462134 0.977428  ]
[0.99920177 0.9761643  0.97907596 0.98769707 0.9819424 ]
[0.99936142 0.98093144 0.98326077 0.99015766 0.98555392]
[0.99948914 0.98474515 0.986608

## 3. Ví dụ sử dụng Gradient Descent với pytorch 

#### - Tính đạo hàm bằng pytorch

In [3]:
x = torch.rand(1)
x.requires_grad_(True)
f = x*x - 2*x + 5
f.backward()
print('Derivative by pytorch: ', x.grad)
print('Manual Derivative: ', 2*x - 2)

Derivative by pytorch:  tensor([-0.6214])
Manual Derivative:  tensor([-0.6214], grad_fn=<SubBackward0>)


#### - Gradient Descent với pytorch

In [5]:
def f(x):
    return x*x - 2*x + 5

In [6]:
learning_rate = 0.1
x = torch.rand(1, requires_grad=True)
optimizer = torch.optim.SGD([x], lr=0.1)

for iter in range(100):
    optimizer.zero_grad()
    out = f(x)
    out.backward(retain_graph=True)
    optimizer.step()
    print(x.detach())

tensor([0.7775])
tensor([0.8220])
tensor([0.8576])
tensor([0.8861])
tensor([0.9088])
tensor([0.9271])
tensor([0.9417])
tensor([0.9533])
tensor([0.9627])
tensor([0.9701])
tensor([0.9761])
tensor([0.9809])
tensor([0.9847])
tensor([0.9878])
tensor([0.9902])
tensor([0.9922])
tensor([0.9937])
tensor([0.9950])
tensor([0.9960])
tensor([0.9968])
tensor([0.9974])
tensor([0.9979])
tensor([0.9984])
tensor([0.9987])
tensor([0.9989])
tensor([0.9992])
tensor([0.9993])
tensor([0.9995])
tensor([0.9996])
tensor([0.9997])
tensor([0.9997])
tensor([0.9998])
tensor([0.9998])
tensor([0.9999])
tensor([0.9999])
tensor([0.9999])
tensor([0.9999])
tensor([0.9999])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000])
tensor([1.0000