In [1]:
import torch
from torch.nn import functional as F
from torch import nn

# 灵活的模型构造

In [4]:
net=nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10)) #Sequential是nn.Module的子类
X=torch.rand(2,20) #均匀分布
net(X)

tensor([[ 0.1167, -0.0221, -0.1987,  0.1602, -0.0675,  0.0579,  0.2181, -0.2926,
          0.1254, -0.0453],
        [ 0.0779,  0.0134, -0.0963,  0.1607,  0.0789,  0.0336,  0.2814, -0.2386,
          0.0793, -0.1339]], grad_fn=<AddmmBackward0>)

In [5]:
#自定义快
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear=nn.Linear(20,256)
        self.out=nn.Linear(256,10)
    def forward(self,X):
        return self.out(F.relu(self.linear(X)))
model=MLP()
X=torch.rand(2,20)
model(X)

tensor([[ 0.0434, -0.2509, -0.0083,  0.0282,  0.2099, -0.0040, -0.0674, -0.0427,
          0.0225,  0.1119],
        [ 0.1472, -0.2190,  0.0708,  0.0665,  0.1904,  0.0088,  0.0466, -0.0129,
          0.0568, -0.1035]], grad_fn=<AddmmBackward0>)

In [7]:
#实现nn.Sequential类
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block]=block
            
    def forward(self,X):
        for block in self._modules.values():
            X=block(X)
        return X
net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X=torch.rand(2,20)
net(X)

tensor([[-6.0128e-03, -1.2707e-01,  1.2986e-01,  1.8668e-02, -4.2986e-05,
         -6.2636e-02, -4.2398e-01,  1.0492e-01, -1.2150e-01,  2.0344e-01],
        [ 1.1496e-01,  9.6769e-03,  1.2532e-01, -6.4050e-02,  3.8905e-02,
         -2.1810e-02, -2.0412e-01,  2.2321e-01, -1.0144e-01, -6.1645e-02]],
       grad_fn=<AddmmBackward0>)

In [9]:
class FixHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weights=torch.rand((20,20),requires_grad=False)
        self.linear=nn.Linear(20,20)
    def forward(self,X):
        X=self.linear(X)
        X=F.relu(torch.mm(X,self.rand_weights)+1)
        X=self.linear(X)
        while X.abs().sum()>1:
            X/=2
        return X.sum()
net=FixHiddenMLP()
net(X)

tensor(0.2392, grad_fn=<SumBackward0>)

In [10]:
#也可以嵌套nn.Sequential和nn.Linear

# 参数管理

In [11]:
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X=torch.rand(2,4)
net(X)

tensor([[0.2574],
        [0.1737]], grad_fn=<AddmmBackward0>)

In [12]:
# 参数访问
print(net[2].state_dict()) #拿出第二层的参数

OrderedDict([('weight', tensor([[ 0.2118, -0.1311, -0.0670,  0.2268,  0.2373, -0.0673,  0.2884,  0.1917]])), ('bias', tensor([-0.1021]))])


In [14]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.1021], requires_grad=True)
tensor([-0.1021])


In [16]:
net[2].weight.grad==None #没有做反向传播

True

In [19]:
# 把网络中所有的参数拿出来
print(*[(name,param.shape) for name,param in net[0].named_parameters()])#其中的*是把列表中的元素提取出来再打印
print(*[(name,param.shape) for name,param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [20]:
net.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.4930,  0.4190, -0.2970, -0.1321],
                      [-0.3947,  0.3195, -0.2461,  0.2102],
                      [-0.1995,  0.3835,  0.2205, -0.1285],
                      [ 0.0518,  0.3629,  0.3224, -0.4075],
                      [-0.4789, -0.3011,  0.4604, -0.3170],
                      [ 0.0703,  0.1764,  0.0018,  0.0486],
                      [-0.4274,  0.4576,  0.2454,  0.2247],
                      [-0.2330, -0.0544,  0.3983,  0.4250]])),
             ('0.bias',
              tensor([ 0.1586, -0.1077, -0.1490, -0.2461,  0.1355, -0.4909, -0.0618,  0.4093])),
             ('2.weight',
              tensor([[ 0.2118, -0.1311, -0.0670,  0.2268,  0.2373, -0.0673,  0.2884,  0.1917]])),
             ('2.bias', tensor([-0.1021]))])

In [25]:
net.state_dict()['2.bias'].data

tensor([-0.1021])

In [29]:
# 从嵌套块中收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())
def block2():
    net=nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1())
    return net
rgnet=nn.Sequential(block2(),nn.Linear(4,1))
print(rgnet)
print(rgnet.state_dict())

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)
OrderedDict([('0.block0.0.weight', tensor([[ 0.3090,  0.0190,  0.4618,  0.0950],
        [ 0.3028, -0.0201, -0.4716,  0.1038],
        [ 0.0097,  0.4959,  

In [35]:
#内置初始化参数
def init_weight(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_weight)
net[2].bias.data,net[2].weight.data

(tensor([0.]),
 tensor([[ 0.0027, -0.0079, -0.0006, -0.0084,  0.0074, -0.0073,  0.0029,  0.0055]]))

In [36]:
def init_constant(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [40]:
#用xavier初始化
def xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)
net[0].apply(xavier)
print(net[0].weight.data)

tensor([[-0.1420, -0.2103,  0.1461,  0.5881],
        [-0.5539,  0.0350,  0.6983,  0.6135],
        [-0.0249,  0.0087, -0.6010, -0.4886],
        [ 0.0863, -0.6371, -0.5282,  0.3626],
        [-0.1899,  0.2108, -0.2150,  0.5823],
        [ 0.4387, -0.2708,  0.6383,  0.1103],
        [-0.4769, -0.0934,  0.5443, -0.5920],
        [ 0.6163,  0.1458,  0.5387, -0.2086]])


In [41]:
#自定义初始化
#更简单暴力的方法，把网络中的权重拿出来直接做替换

In [44]:
#共享权重
shared=nn.Linear(8,8)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.Linear(8,16),nn.ReLU(),nn.Linear(16,8),nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
net[2].weight.data==net[7].weight.data

tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])

# 自定义层

In [50]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,X):
        return X-X.mean()
layer=CenteredLayer()
layer(torch.tensor([1.0,2,3,4,5]))
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [54]:
net=nn.Sequential(nn.Linear(8,128),CenteredLayer(),nn.Linear(128,1))
X=torch.rand(4,8)
net(X)

tensor([[0.0365],
        [0.1579],
        [0.0542],
        [0.0647]], grad_fn=<AddmmBackward0>)

In [60]:
#带参数的类
class MyLinear(nn.Module):
    def __init__(self,in_units,out_units):
        super().__init__()
        self.weights=nn.Parameter(torch.randn(in_units,out_units))
        self.bias=nn.Parameter(torch.zeros(out_units))
    def forward(self,X):
        y=torch.mm(X,self.weights.data)+self.bias.data
        return F.relu(y)
dense=MyLinear(5,3)
dense.weights
dense(torch.rand(2,5))

tensor([[2.0266, 0.0000, 0.0000],
        [2.0189, 1.7062, 0.3353]])

In [62]:
#使用自己写的层来构造模型
net=nn.Sequential(MyLinear(4,16),nn.Linear(16,1))
net(torch.rand(5,4))

tensor([[ 0.2045],
        [-0.0153],
        [ 0.0097],
        [ 0.2535],
        [-0.0016]], grad_fn=<AddmmBackward0>)

# 保存文件

In [65]:
x=torch.arange(6)
torch.save(x,'x_file')
x1=torch.load('x_file')
x1

tensor([0, 1, 2, 3, 4, 5])

In [67]:
#保存一个张量列表
y=torch.zeros(3)
torch.save([x,y],'xy_file')
x1,y1=torch.load('xy_file')
(x1,y1)

(tensor([0, 1, 2, 3, 4, 5]), tensor([0., 0., 0.]))

In [68]:
#保存字典
dict={'x':x,'y':y}
torch.save(dict,'mydict')
mydict=torch.load('mydict')
mydict

{'x': tensor([0, 1, 2, 3, 4, 5]), 'y': tensor([0., 0., 0.])}

In [70]:
#加载和保存模型参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1=nn.Linear(20,256)
        self.linear2=nn.Linear(256,10)
    def forward(self,X):
        return self.linear2(F.relu(self.linear1(X)))
net=MLP()
X=torch.rand(2,20)
Y=net(X)
torch.save(net.state_dict(),'mlp.params')

In [72]:
clone=MLP()
clone.load_state_dict(torch.load('mlp.params'))
Y_clone=clone(X)
Y_clone==Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])