# 参数管理
- 访问参数，用于调试，诊断和可视化
- 参数初始化
- 在不同模型组件间共享参数

In [2]:
import torch
from torch import nn 

net = nn.Sequential(
    nn.Linear(4, 8),
    nn.ReLU(),
    nn.Linear(8, 1)    
)
X = torch.rand(size=(2, 4))

In [3]:
net(X)

tensor([[0.1980],
        [0.1962]], grad_fn=<AddmmBackward0>)

In [4]:
print(net[2])

Linear(in_features=8, out_features=1, bias=True)


In [5]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.2378, -0.0462,  0.0547,  0.0037, -0.1473,  0.1656,  0.3481,  0.1578]])), ('bias', tensor([-0.0252]))])


In [6]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.0252], requires_grad=True)
tensor([-0.0252])


In [7]:
net[2].weight.grad == None

True

In [11]:
print(*[(name, param.shape) for name, param in net.named_parameters()])
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


In [12]:
net.state_dict()["0.weight"].data 

tensor([[-0.4577, -0.0718, -0.2812,  0.0226],
        [ 0.0736, -0.1761, -0.4935,  0.1240],
        [ 0.0874, -0.1780, -0.2035,  0.0379],
        [-0.3729, -0.0220, -0.1970,  0.3437],
        [-0.0219,  0.3221,  0.3463,  0.0447],
        [-0.2661,  0.3351,  0.0163, -0.4106],
        [ 0.1811, -0.2544, -0.1762,  0.4809],
        [-0.3170, -0.2076,  0.4302, -0.0652]])

In [14]:
type([(name, param.shape) for name, param in net.named_parameters()])

list

In [15]:
list = [1, 2, 3]
print(list)

[1, 2, 3]


In [16]:
print(*list)

1 2 3


In [17]:
# 从嵌套块收集参数
def block1():
    return nn.Sequential(
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 4),
        nn.ReLU()
    )

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net 

In [18]:
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

tensor([[0.1291],
        [0.1291]], grad_fn=<AddmmBackward0>)

In [19]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [20]:
rgnet[0][1][0].bias.data 

tensor([-0.2247,  0.2929,  0.2432, -0.3540,  0.3782, -0.1058, -0.0248, -0.4874])

In [21]:
# 参数初始化可以调用 nn.init 里面的方法 
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data, net[0].bias.data

(tensor([[ 0.0153,  0.0012, -0.0092, -0.0004],
         [-0.0023,  0.0039, -0.0157,  0.0067],
         [ 0.0130, -0.0110,  0.0225,  0.0080],
         [ 0.0083,  0.0051, -0.0100,  0.0019],
         [ 0.0083,  0.0010, -0.0122,  0.0110],
         [-0.0015,  0.0016,  0.0059,  0.0014],
         [ 0.0034,  0.0070, -0.0194, -0.0119],
         [ 0.0149, -0.0083,  0.0023,  0.0077]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))