In [3]:
import torch
if torch.cuda.is_available():
    device=torch.device("cuda")
    a=torch.ones(3,device=device)
    b=torch.ones(3)
    b=b.to("cuda")
    c=a+b
    print(c)
    print(c.to("cpu"))

tensor([2., 2., 2.], device='cuda:0')
tensor([2., 2., 2.])


In [None]:
#自动微分
#Tensor:requires_grad设置为True
#完成函数运算操作后用.backward()来自动计算梯度
#梯度保存在Tensor的.grad属性中

In [4]:
x=torch.ones(2,2,requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [5]:
y=x+2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [6]:
z=torch.mean(y)
z

tensor(3., grad_fn=<MeanBackward0>)

In [7]:
z.backward()

In [9]:
#z=1/4((x1+2)+(x2+2)+(x3+2)+(x4+2))
#dz/dxi=1/4=0.25
x.grad

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])

In [20]:
x=torch.randn(4,requires_grad=True)
y=x*2
while y.data.norm()<10:
    y=y*2
y

tensor([-3.9233, 12.6289,  9.0813, -9.2799], grad_fn=<MulBackward0>)

In [21]:
y.backward(torch.Tensor([0.1,1,1,1]))

In [22]:
x.grad

tensor([0.8000, 8.0000, 8.0000, 8.0000])

In [24]:
#不想涉及反向传播可以包装在torch.no_grad里
with torch.no_grad():
    print((x**2).requires_grad)

False


In [27]:
x=torch.ones(3,3)
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [33]:
w=torch.randn(3,1,requires_grad=True)
out=x.mm(w)

In [36]:
loss=torch.sum(out)
loss

tensor(1.8403, grad_fn=<SumBackward0>)

In [37]:
loss.backward()

In [38]:
w.grad

tensor([[3.],
        [3.],
        [3.]])

In [43]:
import sys
sys.executable

'D:\\Anaconda\\python.exe'

In [88]:
import torch.nn as nn
import torch.nn.functional as F

In [121]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1=nn.Conv2d(1,6,3) #输入图像通道=1,kernel个数=6，3*3的卷积核
        self.conv2=nn.Conv2d(6,16,3) #通道=6,kerne个数=16，3*3的卷积核
        #y=wx+b
        self.fc1=nn.Linear(16*6*6,120) #全连接6*6的feature map,通道数16
        self.fc2=nn.Linear(120,84)    #全连接，同上
        self.fc3=nn.Linear(84,10)     #全连接，同上
    def forward(self,x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [122]:
#网络的参数打印
params=list(net.parameters())
print(len(params))
print(params[0].size())   #就是第一波卷积核的参数个数
print(params[1].size())   #偏执的，16个通道数即16个偏置项
print(params[2].size())
print(params[4].size())
print(params[5].size())
print(params[6].size())
print(params[7].size())
print(params[8].size())
print(params[9].size())

10
torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])
torch.Size([120, 576])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [123]:
input=torch.randn(1,1,32,32)
out=net(input)
print(out)

tensor([[ 0.0740, -0.0853, -0.1490, -0.0170,  0.0304,  0.0661,  0.0754,  0.0921,
         -0.0636,  0.0843]], grad_fn=<AddmmBackward>)


In [124]:
#将所有参数的梯度缓存置0
net.zero_grad()
out.backward(torch.randn(1,10))
params[0].grad

tensor([[[[ 0.0291, -0.0144,  0.0867],
          [ 0.0780, -0.1186, -0.0020],
          [-0.0094,  0.0685,  0.0679]]],


        [[[-0.0161, -0.0747, -0.0158],
          [-0.0093,  0.0932,  0.1229],
          [ 0.0412,  0.0201,  0.0515]]],


        [[[ 0.0802, -0.0094, -0.0489],
          [-0.0152, -0.0158,  0.0107],
          [ 0.0022, -0.1026, -0.1327]]],


        [[[-0.1390,  0.0044,  0.0989],
          [-0.1072, -0.0060, -0.0306],
          [-0.1198,  0.0051, -0.0014]]],


        [[[-0.2469,  0.1005,  0.1425],
          [ 0.0696, -0.0957, -0.1556],
          [-0.1197,  0.1738,  0.0825]]],


        [[[ 0.0095, -0.0387,  0.0077],
          [-0.0637, -0.0836, -0.0277],
          [ 0.0675,  0.0055, -0.0556]]]])

In [125]:
net.zero_grad()
params[0].grad

tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]]])

In [129]:
out=net(input)
target=torch.randn(10) 
target=target.view(1,-1) #1*10
criterion=nn.MSELoss()
loss=criterion(out,target)
loss
#计算流图
#input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
#       -> view -> linear -> relu -> linear -> relu -> linear
#       -> MSELoss
#       -> loss

tensor(0.4304, grad_fn=<MseLossBackward>)

In [132]:
loss.grad_fn #MSELoss

<MseLossBackward at 0x14701f9f748>

In [133]:
loss.grad_fn.next_functions[0][0] #Linear

<AddmmBackward at 0x14701f78f28>

In [134]:
loss.grad_fn.next_functions[0][0].next_functions[0][0] #Relu

<AccumulateGrad at 0x14701f78550>

In [137]:
net.zero_grad()

In [138]:
print('conv1的偏执在BP前的梯度')
print(net.conv1.bias.grad)

conv1的在BP前的偏执
tensor([0., 0., 0., 0., 0., 0.])


In [139]:
loss.backward()

In [141]:
print('conv1的偏执在BP后的梯度')
print(net.conv1.bias.grad)

conv1的偏执在BP后的梯度
tensor([-0.0078, -0.0051,  0.0073, -0.0070,  0.0065,  0.0050])


In [143]:
learning_rate=0.01
for i in net.parameters():
    i.data.sub_(i.grad.data*learning_rate)

In [144]:
import torch.optim as optim

In [147]:
optimizer=optim.SGD(net.parameters(),lr=0.01)

#train loop
optimizer.zero_grad()
output=net(input)
loss=criterion(output,target)
loss.backward()
optimizer.step()