In [6]:
#automatic differentiation（微分）
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [7]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x000002666CEBABF0>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x00000266245A60E0>


In [8]:
# 计算梯度，为优化神经网络中的参数权重，需要计算损失函数关于参数的导数
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.0026, 0.3324, 0.3192],
        [0.0026, 0.3324, 0.3192],
        [0.0026, 0.3324, 0.3192],
        [0.0026, 0.3324, 0.3192],
        [0.0026, 0.3324, 0.3192]])
tensor([0.0026, 0.3324, 0.3192])


In [9]:
# 禁用梯度跟踪 disable gradient tracking
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [10]:
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [11]:
# 张量梯度和雅各布乘积  Tensor Gradients and Jacobian Products
# 在很多情况下，我们有一个标量损失函数，需要计算相对于某些参数的梯度。然而，在某些情况下，输出函数是一个任意的张量。在这种情况下，PyTorch 允许你计算所谓的雅各布乘积，而不是实际的梯度。we have a scalar loss function, and we need to compute the gradient with respect to some parameters. However, there are cases when the output function is an arbitrary tensor. In this case, PyTorch allows you to compute so-called Jacobian product, and not the actual gradient.
inp = torch.eye(4, 5, requires_grad=True)
out = (inp+1).pow(2).t()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"First call\n{inp.grad}")
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nSecond call\n{inp.grad}")
inp.grad.zero_()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nCall after zeroing gradients\n{inp.grad}")

First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])

Second call
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])

Call after zeroing gradients
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])
