In [1]:
import torch
from torch import nn
from torch.nn import init

In [3]:
# 此时参数已经默认初始化
net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))
print(net)

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


#### 访问参数

In [4]:
print(type(net.named_parameters()))

<class 'generator'>


In [5]:
for name, param in net.named_parameters():
    print(name, param.size())

0.weight torch.Size([3, 4])
0.bias torch.Size([3])
2.weight torch.Size([1, 3])
2.bias torch.Size([1])


**以Sequential类构造的网络可以用[]来访问网络的层**

In [6]:
for name, param in net[0].named_parameters():
    print(name, param.size(), type(param))

weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>


torch.nn.parameter.Parameter是Tensor的`子类`<br>
两者区别：<br>
**创建前者会被自动添加到模型的参数列表中**

#### 初始化模型参数

In [8]:
for name, param in net.named_parameters():
    if 'weight' in name:
        # 用正态分布初始化权重
        init.normal_(param, mean=0, std=0.01)
        print(name, param.data)

0.weight tensor([[-1.3044e-02,  5.6029e-03,  2.9724e-04, -1.6938e-05],
        [-6.5446e-03,  7.8678e-03,  9.2060e-03, -9.6625e-03],
        [-1.7352e-04,  6.7165e-03,  9.7142e-03, -4.5873e-03]])
2.weight tensor([[-0.0026,  0.0079, -0.0050]])


In [11]:
for name, param in net.named_parameters():
    if 'bias' in name:
        # 用常数初始化权重
        init.constant_(param, val=0)
        print(name, param.data)

0.bias tensor([0., 0., 0.])
2.bias tensor([0.])


#### **自定义初始化权重**？

构造函数对tensor进行操作但是不要记录梯度

 类似的我们来实现一个自定义的初始化方法。在下面的例子里，我们令权重有一半概率初始化为0，有另一半概率初始化为$[−10,−5][−10,−5]$和$[5,10][5,10]$两个区间里均匀分布的随机数。

In [13]:
def init_weight_(tensor):
    with torch.no_grad():
        tensor.uniform_(-10, 10)
        tensor *= (tensor.abs() >=5).float()

#### **共享模型参数**

调用同一个层即可

In [15]:
linear = nn.Linear(1, 1, bias=False)
net = nn.Sequential(linear, linear, linear) 
print(net)
for name, param in net.named_parameters():
    init.constant_(param, val=3)
    print(name, param.data)


Sequential(
  (0): Linear(in_features=1, out_features=1, bias=False)
  (1): Linear(in_features=1, out_features=1, bias=False)
  (2): Linear(in_features=1, out_features=1, bias=False)
)
0.weight tensor([[3.]])


In [18]:
x = torch.ones(1, 1)
y = net(x).sum()
print(y)

tensor(27., grad_fn=<SumBackward0>)


In [19]:
y.backward()

In [20]:
print(net[0].weight.grad)

tensor([[27.]])
