In [24]:
import torch 
from torch import nn
from torch.nn import functional as F 

net=nn.Sequential(
    nn.Linear(20,256),nn.ReLU(),
    nn.Linear(256,10),
    )


In [65]:
X=torch.rand(2,20)
net(X)

tensor(-0.2307, grad_fn=<SumBackward0>)

In [26]:
class MLP(nn.Module):
    # 初始化
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(20,256)
        self.out=nn.Linear(256,10)
        
    # 前向传播
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

In [27]:
net=MLP()
net(X)

tensor([[ 0.1183,  0.1793, -0.0641,  0.2207, -0.0670,  0.1596,  0.0288, -0.0552,
          0.2332, -0.0858],
        [-0.0982,  0.0153, -0.1551,  0.1446, -0.0291, -0.0869, -0.0237,  0.0318,
          0.3343,  0.0980]], grad_fn=<AddmmBackward>)

In [28]:
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            # 有序字典 OrderedDict
            self._modules[block]=block
            
    def forward(self,X):
        for block in self._modules.values():
            X=block(X)
        return X

In [29]:
net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[-0.1367,  0.0079,  0.0159,  0.2326, -0.0025, -0.0241,  0.0326, -0.0012,
         -0.0854,  0.1710],
        [-0.1701,  0.1012,  0.0217,  0.2800, -0.1296,  0.0390,  0.1010,  0.0321,
         -0.0360,  0.1450]], grad_fn=<AddmmBackward>)

In [66]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight=torch.rand((20,20),requires_grad=False)
        self.linear=nn.Linear(20,20)
        
    def forward(self,X):
        X=self.linear(X)
        X=F.relu(torch.mm(X,self.rand_weight)+1)
        X=self.linear(X)
        while X.abs().sum()>1:
            X/=2
        return X.sum()
    

In [67]:
net=FixedHiddenMLP()
net(X)

tensor(-0.1224, grad_fn=<SumBackward0>)

In [69]:
net.state_dict()
net.share_memory()

FixedHiddenMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)

In [34]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net=nn.Sequential(
            nn.Linear(20,64),nn.ReLU(),
            nn.Linear(64,32),nn.ReLU(),
        )
        self.linear=nn.Linear(32,16)
        
    def forward(self,X):
        return self.linear(self.net(X))

In [35]:
chimera=nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
chimera(X)

tensor(0.0883, grad_fn=<SumBackward0>)

In [71]:
# 参数管理
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X=torch.rand(size=(2,4))
net(X)
net.share_memory()

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [37]:
# 参数访问
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.1038,  0.2160, -0.1391,  0.2499, -0.0145, -0.0202, -0.1730,  0.1404]])), ('bias', tensor([-0.0140]))])


In [38]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.0140], requires_grad=True)
tensor([-0.0140])


In [39]:
net[2].weight.grad==None

True

In [44]:
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape)for name,param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [45]:
net.state_dict()["2.bias"].data

tensor([-0.0140])

In [46]:
def block1():
    return nn.Sequential(
        nn.Linear(4,8),nn.ReLU(),
        nn.Linear(8,4),nn.ReLU(),
    )
    
def block2():
    net=nn.Sequential()
    for i in range(4):
        net.add_module(f"block {i}",block1())
    return net

In [47]:
rgnet=nn.Sequential(block2(),nn.Linear(4,1))
rgnet(X)

tensor([[0.3979],
        [0.3979]], grad_fn=<AddmmBackward>)

In [48]:
rgnet.state_dict()

OrderedDict([('0.block 0.0.weight',
              tensor([[ 0.3876, -0.0572, -0.3471, -0.0484],
                      [ 0.0251,  0.2788,  0.2427, -0.2249],
                      [ 0.0743,  0.1438, -0.2229, -0.1171],
                      [-0.1282, -0.4655, -0.0473, -0.1753],
                      [ 0.4167, -0.4027, -0.1831, -0.0505],
                      [-0.2240,  0.0078,  0.3060,  0.0972],
                      [ 0.1305,  0.4635, -0.1303,  0.3904],
                      [ 0.3158,  0.3299, -0.1738, -0.4543]])),
             ('0.block 0.0.bias',
              tensor([ 0.0499,  0.1651,  0.1781,  0.3592,  0.1308,  0.3764,  0.4786, -0.3723])),
             ('0.block 0.2.weight',
              tensor([[ 0.2558,  0.0488, -0.0215,  0.0732,  0.3301, -0.0171,  0.1772, -0.0424],
                      [ 0.1130,  0.0296,  0.2470, -0.0065, -0.2356,  0.0540,  0.0213,  0.1732],
                      [ 0.0119,  0.0143,  0.1322,  0.2425,  0.1184,  0.1317, -0.2126, -0.0495],
                      [-0.

In [49]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [50]:
# 复杂网络模块化

In [51]:
# 内置初始化
def init_normal(m):
    if type(m)==nn.Linear:
        # 或者 isinstance
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)

In [52]:
net.apply(init_normal)
net[0].weight.data,net[0].bias.data

(tensor([[-0.0101, -0.0033, -0.0013, -0.0129],
         [-0.0083,  0.0019, -0.0084,  0.0080],
         [ 0.0014, -0.0041, -0.0034, -0.0008],
         [ 0.0030,  0.0005,  0.0127,  0.0141],
         [-0.0071,  0.0102, -0.0084, -0.0166],
         [-0.0284, -0.0002, -0.0147,  0.0034],
         [-0.0064, -0.0113, -0.0029, -0.0078],
         [ 0.0011,  0.0131, -0.0061,  0.0194]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [53]:
def init_constant(m):
    if isinstance(m,nn.Linear):
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)



In [54]:
net.apply(init_constant)
net[0].weight.data, net[0].bias.data


(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [55]:
def xavier(m):
    if isinstance(m,nn.Linear):
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if isinstance(m,nn.Linear):
        nn.init.constant_(m.weight,42)

In [56]:
net[0].apply(xavier)
net[2].apply(init_42)
net[0].weight.data,net[2].weight.data

(tensor([[-0.7031,  0.5043,  0.3071,  0.6540],
         [ 0.0689,  0.2009,  0.3191,  0.0474],
         [ 0.1849,  0.5142, -0.1172, -0.6657],
         [-0.3467,  0.6593,  0.0093,  0.6041],
         [-0.0231,  0.4370,  0.6087,  0.2841],
         [ 0.4364,  0.5877, -0.3274,  0.5382],
         [-0.1846, -0.0218,  0.5533, -0.2661],
         [ 0.3078,  0.5764, -0.6570,  0.4341]]),
 tensor([[42., 42., 42., 42., 42., 42., 42., 42.]]))

In [57]:
def my_init(m):
    if isinstance(m,nn.Linear):
        print(
            "Init",
            *[(name,param.shape) for name,param in m.named_parameters()][0]
        )
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data*=m.weight.data.abs()>=5


In [58]:
net.apply(my_init)
net[0].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


Parameter containing:
tensor([[ 0.0000, -9.1281,  7.3678, -0.0000],
        [-0.0000,  7.3443, -7.0752,  0.0000],
        [ 7.8845, -0.0000, -0.0000, -0.0000],
        [-0.0000,  0.0000, -5.6070, -0.0000],
        [ 0.0000,  5.2102,  0.0000, -6.3700],
        [-0.0000, -9.8299, -0.0000, -0.0000],
        [-0.0000,  0.0000,  7.0766, -7.2053],
        [-0.0000, -0.0000, -6.1195,  0.0000]], requires_grad=True)

In [61]:
net[0].weight.data[:]+=1
net[0].weight.data[0][0]=42
net[0].weight.data[0]

tensor([42.0000, -6.1281, 10.3678,  3.0000])

In [72]:
# 参数绑定
# 相当于同一个实例
shared=nn.Linear(8,8)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),
                  shared,nn.ReLU(),shared,
                  nn.ReLU(),nn.Linear(8,1))

In [74]:
net(X)
print(net[2].weight.data==net[4].weight.data)

tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])


In [75]:
net[2].weight.data[0,0]=100
print(net[2].weight.data==net[4].weight.data)

tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])


In [76]:
# 自定义层
# 自定义层和自定义网络没有区别 因为都是nn.Module的子类
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,X):
        return X-X.mean()

In [78]:
layer=CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [79]:
net=nn.Sequential(nn.Linear(8,128,CenteredLayer()))

In [80]:
Y=net(torch.rand(4,8))
Y.mean()

tensor(-0.0184, grad_fn=<MeanBackward0>)

In [81]:
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        # 必须使用nn.Parameter
        self.weight=nn.Parameter(torch.randn(in_units,units))
        self.bias=nn.Parameter(torch.randn(units,))
        
    def forward(self,X):
        linear=torch.mm(X,self.weight.data)+self.bias.data
        return F.relu(linear)

In [91]:
dense=MyLinear(5,3)
dense.weight

Parameter containing:
tensor([[-1.0934, -1.2295,  0.4299],
        [ 1.6728, -0.5149, -0.0973],
        [-0.1699, -1.1048, -0.1938],
        [ 2.0758, -1.5320,  2.0393],
        [ 0.1195,  0.6849,  1.8487]], requires_grad=True)

In [92]:
dense(torch.rand(2,5))

tensor([[0.0000, 0.0000, 1.1714],
        [1.5484, 0.0000, 1.9976]])

In [93]:
net=nn.Sequential(MyLinear(64,8),MyLinear(8,1))
net(torch.rand(2,64))

tensor([[0.],
        [0.]])

In [94]:
# 读写文件
x=torch.arange(4)
torch.save(x,"x-file")

x2=torch.load("x-file")
x2

tensor([0, 1, 2, 3])

In [95]:
y=torch.zeros(4)
torch.save([x,y],"x-files")
x2,y2=torch.load("x-files")
[x2,y2]

[tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.])]

In [96]:
mydict={"x":x,"y":y}
torch.save(mydict,"mydict")
mydict2=torch.load("mydict")
mydict2

{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}

In [97]:
# 加载和保存模型参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(20,256)
        self.output=nn.Linear(256,10)
        
    def forward(self,x):
        return self.output(F.relu(self.hidden(x)))
    

In [98]:
net=MLP()
X=torch.randn(size=(2,20))
Y=net(X)

In [99]:
# torch不方便把模型定义存下来
# 只要存权重
# 存权重字典即可
torch.save(net.state_dict(),"mlp.params")


In [100]:
clone=MLP()
clone.load_state_dict(torch.load("mlp.params"))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [101]:
Y_clone=clone(X)
Y_clone==Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])