In [2]:
import torch
from torch import nn

In [3]:
net = nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8,1))
X = torch.rand(size=(2,4))
net(X)

tensor([[0.1068],
        [0.0291]], grad_fn=<AddmmBackward>)

## 参数访问

In [4]:
print(net)
print(net.state_dict())
print(net[2])
print(net[2].state_dict())

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)
OrderedDict([('0.weight', tensor([[ 0.3343,  0.2604,  0.3563,  0.4492],
        [ 0.1590,  0.3543,  0.4599, -0.1437],
        [ 0.2245, -0.3005,  0.1645, -0.3769],
        [ 0.1353, -0.1557, -0.3814, -0.1598],
        [ 0.2827, -0.4184, -0.0987,  0.1361],
        [-0.4335,  0.2148,  0.4989,  0.2369],
        [ 0.3586,  0.0969, -0.3433,  0.4620],
        [-0.1466,  0.1045,  0.2789, -0.4877]])), ('0.bias', tensor([-0.4881, -0.4485, -0.0498, -0.3285,  0.1296,  0.2890, -0.3478,  0.2077])), ('2.weight', tensor([[ 0.0058,  0.1316, -0.2567,  0.1780, -0.2821, -0.1498,  0.3002, -0.1276]])), ('2.bias', tensor([0.1736]))])
Linear(in_features=8, out_features=1, bias=True)
OrderedDict([('weight', tensor([[ 0.0058,  0.1316, -0.2567,  0.1780, -0.2821, -0.1498,  0.3002, -0.1276]])), ('bias', tensor([0.1736]))])


In [5]:
print(type(net[2].bias))
print(type(net[2].weight))

<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>


In [6]:
print(net[2].weight)
print(net[2].bias)
print(net[2].weight.data)

Parameter containing:
tensor([[ 0.0058,  0.1316, -0.2567,  0.1780, -0.2821, -0.1498,  0.3002, -0.1276]],
       requires_grad=True)
Parameter containing:
tensor([0.1736], requires_grad=True)
tensor([[ 0.0058,  0.1316, -0.2567,  0.1780, -0.2821, -0.1498,  0.3002, -0.1276]])


In [7]:
net[2].weight.grad == None

True

In [8]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [9]:
net.state_dict()['2.bias'].data

tensor([0.1736])

In [10]:
def block1():
    return nn.Sequential(nn.Linear(4,8), nn.ReLU(),
                        nn.Linear(8,4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'{i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)

tensor([[-0.2649],
        [-0.2649]], grad_fn=<AddmmBackward>)

In [11]:
print(type(rgnet[0]))
print(type(rgnet[0][0]))
print(type(rgnet[0][0][0]))

<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.linear.Linear'>


In [12]:
# 其实就是nn.Sequential进行嵌套
print(rgnet)
print(rgnet[0][0])

Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)
Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=4, bias=True)
  (3): ReLU()
)


In [13]:
print(rgnet[0][0][0].weight.data)

tensor([[ 0.0637, -0.2935,  0.0560, -0.0344],
        [ 0.0405,  0.2113, -0.1617,  0.3412],
        [ 0.2150, -0.2047, -0.1454,  0.0557],
        [-0.2039, -0.0025, -0.2686, -0.2386],
        [ 0.1435,  0.1566, -0.1098, -0.0633],
        [ 0.1894, -0.4058, -0.3307,  0.4768],
        [-0.3513,  0.2465,  0.3223,  0.2580],
        [-0.1680, -0.1666,  0.4254, -0.3404]])


## 参数初始化

In [14]:
def init_normal(m):
    if type(m) == nn.Linear:
        # 后面有下划线表明当前函数是一个原地更改函数
        # 即操作后不需要重新复制
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

# 对所有层都使用
net.apply(init_normal)
net[0].weight.data[0]

tensor([ 0.0105,  0.0028,  0.0064, -0.0014])

In [15]:
# 将所有参数初始化为1
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 1)

net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(1.))

In [16]:
def init_xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
    
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)
    
net[0].apply(init_xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data[0])

tensor([ 0.2057,  0.1676, -0.2719, -0.4589])
tensor([42., 42., 42., 42., 42., 42., 42., 42.])


## 自定义初始化

In [24]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", *[(name, param.shape) for name,param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
net[2].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


Parameter containing:
tensor([[ 6.2417,  0.0000, -6.6060, -0.0000, -0.0000,  6.4868, -0.0000, -0.0000]],
       requires_grad=True)

In [28]:
net[0].weight.data[:] += 1
net[0].weight.data[0,0] = 42
net[0].weight.data

tensor([[42.0000,  4.0000,  4.0000,  4.0000],
        [ 4.0000,  4.0000,  4.0000, 12.4120],
        [ 4.0000,  4.0000,  4.0000, 12.1799],
        [ 4.0000,  4.0000, 13.2496,  4.0000],
        [ 9.8269,  4.0000, -4.2116,  4.0000],
        [-5.0901,  4.0000, -1.5624,  4.0000],
        [ 4.0000,  4.0000,  9.9727,  4.0000],
        [-4.2487,  4.0000, 11.4649,  4.0000]])

## 参数绑定

In [None]:
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                   shared, nn.ReLU(),
                   shared, nn.ReLU(),
                   nn.Linear(8,1))
net(X)
# 检查参数是否相同
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
# 确保它们实际上是同一个对象，而不只是有相同的值
print(net[2].weight.data[0] == net[4].weight.data[0])