In [1]:
import torch
import numpy as np

# torch 求导

参考 [url](https://pytorch.org/tutorials/beginner/basics/autogradqs_tutorial.html)

pytorch 实现模型训练需要完整地写下训练过程，包括反向传播求梯度以及应用梯度下降算法。（06见chapter_2/03_...)

## 近似求导

In [2]:
def f(x):
    return 3. * x ** 2 + 2. * x - 1
#近视求导，x移动eps单位，也就是离自己很近的一个点的切线
def approximate_derivative(f, x, eps=1e-4):
    return (f(x + eps) - f(x - eps)) / (2. * eps)

print(approximate_derivative(f, 1.))

7.999999999994678


In [3]:
#求偏导数,其中一个数不动，对另外一个变量求导
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))
    

(8.999999999993236, 41.999999999994486)


## torch 近似求导

In [4]:
# 声明两个tensor x1 和 x2，允许梯度计算，使用torch的自动求导上下文计算两个tensor的梯度
# 使用 torch.autograd.grad 计算 y = g(x1, x2) 的偏导数

x1 = torch.tensor([2.], requires_grad=True)
x2 = torch.tensor([3.], requires_grad=True)
y = g(x1, x2)
    
(dy_dx1,) = torch.autograd.grad(y, x1)
print(dy_dx1)

try:
    (dy_dx2,) = torch.autograd.grad(y, x2)
    print(dy_dx2)
except Exception as e:
    print(e)

tensor([9.])
Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.


In [5]:
# 同时求导

x1 = torch.tensor([2.], requires_grad=True)
x2 = torch.tensor([3.], requires_grad=True)
y = g(x1, x2)

# 求偏导数
dy_dx1, dy_dx2 = torch.autograd.grad(y, [x1, x2])


print(dy_dx1, dy_dx2)

tensor([9.]) tensor([42.])


In [7]:
# 当然我们一般直接用 backward

x1 = torch.tensor([2.], requires_grad=True)
x2 = torch.tensor([3.], requires_grad=True)
y = g(x1, x2)

# 求偏导数    
y.backward()
print(x1.grad, x2.grad)

tensor([9.]) tensor([42.])


## 二阶导


In [17]:
x1 = torch.tensor([2.], requires_grad=True)
x2 = torch.tensor([3.], requires_grad=True)
y = g(x1, x2)

# 求y对x1和x2的二阶偏导数
dy_dx1, dy_dx2 = torch.autograd.grad(y, [x1, x2], create_graph=True)
_, dy_dx1_dx1 = torch.autograd.grad(dy_dx1, [x1, x2], allow_unused=True)
dy_dx2_dx1, dy_dx2_dx2 = torch.autograd.grad(dy_dx2, [x1, x2], allow_unused=True)
print(dy_dx1_dx1, dy_dx2_dx1, dy_dx2_dx2)

tensor([6.]) tensor([6.]) tensor([14.])


In [18]:
#模拟梯度下降算法 SGD
learning_rate = 0.1
x = torch.tensor(-1.0, requires_grad=True)
for _ in range(100):
    z = f(x)
    z.backward()
    x.data.sub_(learning_rate * x.grad)
    x.grad.zero_()
print(x)

tensor(-0.3333, requires_grad=True)


In [19]:
#GradientTape与optimizer（优化器）结合使用
learning_rate = 0.1
x = torch.tensor(2.0, requires_grad=True)
optimizer = torch.optim.SGD([x], lr=learning_rate)
for _ in range(100):
    z = f(x)
    z.backward()
    optimizer.step()
    optimizer.zero_grad()
print(x)


tensor(-0.3333, requires_grad=True)
