In [13]:
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F

In [14]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__() #继承父类初始化的方法
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1= nn.Conv2d(1,6,5)
        self.conv2= nn.Conv2d(6,16,5)

        #  an affine operation: y = Wx + b
        self.fc1= nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)       

    def forward(self,x):
        # Max pooling over a (2, 2) window
        x=F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        # If the size is a square, you can specify with a single number
        x= F.max_pool2d(F.relu(self.conv2(x)),2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x= F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net)



Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


您只需要定义forward函数，就可以使用autograd为您自动定义backward函数（计算梯度）。 您可以在forward函数中使用任何张量操作。

In [15]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


尝试一个32x32随机输入。 注意：该网络的预期输入大小（LeNet）为32x32。 要在 MNIST 数据集上使用此网络，请将图像从数据集中调整为32x32

In [16]:
input = torch.rand(1,1,32,32)
out = net(input)
print(out)

tensor([[ 0.1024, -0.1426, -0.0364, -0.0722, -0.0210,  0.0902, -0.0801, -0.0674,
          0.0620,  0.0855]], grad_fn=<AddmmBackward>)


使用随机梯度将所有参数和反向传播的梯度缓冲区归零：

In [17]:
net.zero_grad()
out.backward(torch.randn(1,10))

In [18]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
print(target)
target = target.view(1, -1)  # make it the same shape as output view=reshape
print(target)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)


tensor([-0.4768,  0.5627, -0.4631, -0.1945,  2.2527, -0.0236, -0.5360,  0.8781,
         1.0647,  0.0367])
tensor([[-0.4768,  0.5627, -0.4631, -0.1945,  2.2527, -0.0236, -0.5360,  0.8781,
          1.0647,  0.0367]])
tensor(0.8322, grad_fn=<MseLossBackward>)


In [19]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU


<MseLossBackward object at 0x7f9046525280>
<AddmmBackward object at 0x7f9046525d60>
<AccumulateGrad object at 0x7f9046525280>


反向传播
要反向传播误差，我们要做的只是对loss.backward()。 不过，您需要清除现有的梯度，否则梯度将累积到现有的梯度中。
现在，我们将其称为loss.backward()，然后看一下向后前后conv1的偏差梯度。

In [20]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)


conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0000, -0.0135,  0.0055,  0.0026,  0.0024,  0.0116])
