### 4.2 Autograd

In [2]:
import torch

In [3]:
x = torch.tensor([ 1., 2. ])

y = torch.tensor([ 4., 5. ])

z = torch.tensor([ 7., 3. ])

In [4]:
x

tensor([1., 2.])

In [5]:
y

tensor([4., 5.])

In [6]:
z

tensor([7., 3.])

In [7]:
x.requires_grad

False

In [8]:
(x + y).requires_grad

False

In [9]:
z.requires_grad = True

In [11]:
(x + z).requires_grad   # 只要其中一个tensor的requires_grad为True，那么整体操作结果的requires_gread就为True

True

In [12]:
x = torch.tensor([1., 10.])

In [13]:
x

tensor([ 1., 10.])

In [14]:
x.requires_grad = True

In [15]:
x = torch.tensor([1, 10])

In [16]:
x.requires_grad = True

RuntimeError: only Tensors of floating point dtype can require gradients

以上说明Only ﬂoating point type tensors can have their gradient computed.

In [17]:
t = torch.tensor([1., 2., 4.]).requires_grad_() 

In [18]:
t.requires_grad

True

以上说明requires_grad_(value = True)方法将requires_grad设置为value, value的值默认为True.

In [19]:
u = torch.tensor([10., 20.]).requires_grad_()

In [20]:
t

tensor([1., 2., 4.], requires_grad=True)

In [21]:
u

tensor([10., 20.], requires_grad=True)

In [22]:
a = t.pow(2).sum() + u.log().sum()

In [23]:
a

tensor(26.2983, grad_fn=<AddBackward0>)

In [24]:
torch.autograd.grad(a,(t,u))

(tensor([2., 4., 8.]), tensor([0.1000, 0.0500]))

In [27]:
u.log() # 以e为底

tensor([2.3026, 2.9957], grad_fn=<LogBackward>)

In [28]:
x = torch.tensor([ -3., 2., 5. ]).requires_grad_()

In [29]:
x

tensor([-3.,  2.,  5.], requires_grad=True)

In [30]:
u = x.pow(3).sum()

In [31]:
x.grad

In [32]:
u.backward()

In [33]:
x.grad

tensor([27., 12., 75.])

In [35]:
w1 = torch.rand(5, 5).requires_grad_() 
w2 = torch.rand(5, 5).requires_grad_() 
x = torch.empty(5).normal_()

In [36]:
w1

tensor([[0.3095, 0.9126, 0.6084, 0.3153, 0.2136],
        [0.6034, 0.7012, 0.9473, 0.1753, 0.6081],
        [0.6194, 0.1951, 0.0654, 0.8663, 0.5489],
        [0.7108, 0.9270, 0.3417, 0.9668, 0.4744],
        [0.3520, 0.7316, 0.8588, 0.5702, 0.8709]], requires_grad=True)

In [37]:
w2

tensor([[0.5442, 0.4907, 0.8173, 0.3936, 0.0683],
        [0.0711, 0.0010, 0.7382, 0.5128, 0.2019],
        [0.0136, 0.9295, 0.4401, 0.1794, 0.0640],
        [0.6900, 0.0870, 0.8035, 0.7263, 0.1989],
        [0.2085, 0.0250, 0.0379, 0.8382, 0.2666]], requires_grad=True)

In [38]:
x

tensor([ 0.1977,  0.3297,  1.3046, -1.0943,  0.6292])

In [45]:
x0 = x
x1 = w1 @ x0
x2 = x0 + w2 @ x1
x3 = w1 @ (x1 + x2)

q = x3.norm()


w1.grad
w2.grad

q.backward()


In [46]:
print(w1.grad)
print(w2.grad)

tensor([[  5.9515,   7.3059,  19.2665, -12.9478,  11.4498],
        [  8.0277,   9.6192,  24.1889, -15.6670,  14.7705],
        [  6.5915,   9.1276,  29.2564, -22.2533,  15.6460],
        [  7.9666,  10.1549,  28.6588, -20.1990,  16.4008],
        [  6.9705,   7.8997,  17.5434, -10.1459,  11.5298]])
tensor([[ 4.2333,  7.9600, -1.4806,  0.5934,  6.0701],
        [ 6.0019, 11.2857, -2.0993,  0.8413,  8.6061],
        [ 5.2017,  9.7809, -1.8194,  0.7291,  7.4586],
        [ 4.4928,  8.4480, -1.5714,  0.6297,  6.4422],
        [ 4.6843,  8.8081, -1.6384,  0.6566,  6.7168]])


In [48]:
x = torch.tensor([ 1.0, -2.0, 3.0, -4.0 ]).requires_grad_()

In [49]:
a = x.abs() # 取绝对值

In [50]:
a

tensor([1., 2., 3., 4.], grad_fn=<AbsBackward>)

In [51]:
s = a.sum()

In [52]:
s

tensor(10., grad_fn=<SumBackward0>)

In [53]:
s.grad_fn.next_functions

((<AbsBackward at 0x10bdbf668>, 0),)

In [54]:
s.grad_fn.next_functions[0][0].next_functions

((<AccumulateGrad at 0x10bdbf7b8>, 0),)

In [55]:
x = torch.tensor([1., 2., 2.]).requires_grad_() 
q = x.norm()

In [56]:
x

tensor([1., 2., 2.], requires_grad=True)

In [57]:
q

tensor(3., grad_fn=<NormBackward0>)

In [69]:
a = torch.tensor( 0.5).requires_grad_() 
b = torch.tensor(-0.5).requires_grad_()

eta=0.1
for k in range(100): 
    l = (a - 1)**2 + (b + 1)**2 + (a - b)**2 
    ga, gb = torch.autograd.grad(l, (a, b)) 
    with torch.no_grad(): 
        a -= eta * ga 
        b -= eta * gb

print('%.06f' % a.item(), '%.06f' % b.item())

0.333333 -0.333333


In [68]:
a = torch.tensor( 0.5).requires_grad_() 
b = torch.tensor(-0.5).requires_grad_()

eta=0.1
for k in range(100): 
    l = (a - 1)**2 + (b + 1)**2 + (a.detach() - b)**2 
    ga, gb = torch.autograd.grad(l, (a, b)) 
    with torch.no_grad(): 
        a -= eta * ga 
        b -= eta * gb

print('%.06f' % a.item(), '%.06f' % b.item())

1.000000 -0.000000
