## 16.pytorch神经网络基础

### 参数管理
#### 首先关注单隐藏层的MLP：参数访问

In [None]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size=(2,4))
net(X)

tensor([[ 0.0038],
        [-0.0682]], grad_fn=<AddmmBackward0>)

参数访问

In [None]:
print(net[2].state_dict())      # 第3层的参数
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)         # .data为参数的值，.grad为参数的梯度
print(net[2].weight.grad)

OrderedDict([('weight', tensor([[-0.3173,  0.0221, -0.3406, -0.1552,  0.1557, -0.2558,  0.0014,  0.2008]])), ('bias', tensor([0.0560]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.0560], requires_grad=True)
tensor([0.0560])
None


一次性访问所有参数

In [None]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


In [None]:
print(*[(name, param.shape) for name, param in net.named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [None]:
net.state_dict()['2.bias'].data

tensor([0.0560])

#### 从嵌套块收集参数

In [None]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

tensor([[0.1084],
        [0.1084]], grad_fn=<AddmmBackward0>)

In [None]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


#### 参数初始化

内置初始化

In [None]:
def init_normal(m):
    if type(m) == nn.Linear:                            # 如果本层是全连接层的话
        nn.init.normal_(m.weight, mean=0, std=0.01)     # 后下划线表示“函数会对输入进行改变”
        nn.init.zeros_(m.bias)

net.apply(init_normal)                                  # 将函数逐层应用到net，嵌套遍历
net[0].weight.data, net[0].bias.data

(tensor([[-0.0037, -0.0237, -0.0028, -0.0029],
         [-0.0121, -0.0031,  0.0058,  0.0124],
         [ 0.0020, -0.0152, -0.0196, -0.0012],
         [-0.0181, -0.0065,  0.0016,  0.0048],
         [-0.0054,  0.0028, -0.0166,  0.0041],
         [ 0.0043, -0.0027, -0.0166, -0.0087],
         [-0.0095, -0.0255, -0.0120,  0.0110],
         [ 0.0153, -0.0049,  0.0063,  0.0075]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [None]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)

net.apply(init_constant)
net[0].weight.data, net[0].bias.data

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

对某些块应用不同的初始化方法：

In [None]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[-0.2610,  0.1214,  0.4056, -0.3780],
        [-0.3705,  0.3950,  0.1350,  0.2195],
        [-0.2261, -0.6458, -0.4833, -0.6504],
        [ 0.6422,  0.4503,  0.5086,  0.0754],
        [ 0.3258,  0.1890,  0.6719, -0.3198],
        [-0.3559,  0.0719,  0.6579,  0.3461],
        [-0.3692, -0.3734,  0.3412,  0.2992],
        [ 0.6932, -0.1207, -0.6441,  0.0870]])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


自定义初始化

In [None]:
def my_init(m):
    if type(m) == nn.Linear:
        print(
            "Init",
            *[(name, param.shape) for name, param in m.named_parameters()]
        )
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5       # 将(-5,5)内的数置0

net.apply(my_init)
net[0].weight.data

Init ('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
Init ('weight', torch.Size([1, 8])) ('bias', torch.Size([1]))


tensor([[ 0.0000,  5.0572, -7.9576, -5.5237],
        [ 5.2125,  0.0000, -0.0000, -0.0000],
        [ 0.0000, -7.1348, -0.0000,  8.1378],
        [-0.0000, -0.0000, -5.1147, -8.3742],
        [-8.2564,  0.0000,  5.5102,  8.2255],
        [ 0.0000,  0.0000,  6.7932, -0.0000],
        [-0.0000,  0.0000,  5.5882,  5.3236],
        [ 6.0393,  5.5033, -0.0000,  0.0000]])

In [None]:
# 也可以直接将参数拿出来操作，不使用nn
net[0].weight.data += 1
net[0].weight.data

tensor([[ 2.0000,  7.0572, -5.9576, -3.5237],
        [ 7.2125,  2.0000,  2.0000,  2.0000],
        [ 2.0000, -5.1348,  2.0000, 10.1378],
        [ 2.0000,  2.0000, -3.1147, -6.3742],
        [-6.2564,  2.0000,  7.5102, 10.2255],
        [ 2.0000,  2.0000,  8.7932,  2.0000],
        [ 2.0000,  2.0000,  7.5882,  7.3236],
        [ 8.0393,  7.5033,  2.0000,  2.0000]])

#### 参数绑定：采用同一实例

In [None]:
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1))
net(X)
print((net[2].weight.data == net[4].weight.data).all())
net[2].weight.data[0,0] == 100
print(net[2].weight.data[0,0] == net[4].weight.data[0,0])

tensor(True)
tensor(True)
