## 5.1 层和块

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
net=nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))

In [3]:
X=torch.rand(2,20)
net(X)

tensor([[ 0.0760,  0.0979, -0.1677,  0.2275,  0.4244,  0.0422,  0.0832,  0.2170,
         -0.0620, -0.0627],
        [ 0.0496,  0.1505, -0.1536,  0.0427,  0.2895,  0.0340,  0.0113,  0.1495,
          0.0305, -0.0661]], grad_fn=<AddmmBackward>)

### 5.1.1自定义块

In [4]:
##参数：存储，访问，初始化
##梯度：计算梯度
##正向传播
class MLP(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(20,256)
        self.out=nn.Linear(256,10)
    
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

In [5]:
net=MLP()
net(X)

tensor([[ 0.0060, -0.0126, -0.2154, -0.0546, -0.1649,  0.1012, -0.0039,  0.0295,
         -0.0916,  0.3711],
        [-0.0424,  0.0529, -0.0647, -0.0787, -0.2735,  0.0818,  0.0110,  0.0171,
         -0.1743,  0.4050]], grad_fn=<AddmmBackward>)

### 5.1.2顺序块

In [6]:
class MySequential(nn.Module):
    '''1.将块逐个加入到列表中
       2.一个正向传播函数
    '''
    def __init__(self,*args):
        ##其中每一个module都有一个_modules：ordered_dict
        super().__init__()
        for block in args:
            self._modules[block]=block
    
    def forward(self,X):
        for block in self._modules.values():
            X=block(X)
        return X

In [7]:
net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))

In [8]:
net(X)

tensor([[-0.1463, -0.0703, -0.1245,  0.2888, -0.1128, -0.1945, -0.1304,  0.0710,
         -0.0110,  0.1994],
        [-0.1599, -0.1476, -0.1573,  0.2782, -0.0534, -0.0677, -0.0601, -0.0178,
         -0.0494,  0.1093]], grad_fn=<AddmmBackward>)

 ### 5.1.3 在正向传播中执行代码

In [9]:
class FixedHiddenMLP(nn.Module):
    
    def __init__(self):
        super.__init__()
        self.rand_weight=torch.rand((20,20),requires_grad=False)
        self.linear=nn.Linear(20,20)
        
    def forward(self,X):
        X=self.linear(X)
        X=F.relu(torch.mm(X,self.rand_weight)+1)
        X=self.linear(X)
        while X.abs.sum()>1:
            X/=2
        return X.sum()

## 5.2 参数管理

参数访问，参数初始化，参数绑定

In [10]:
import torch
from torch import nn

In [11]:
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))

In [12]:
X=torch.rand(size=(2,4))

In [13]:
net(X)

tensor([[-0.0268],
        [-0.1702]], grad_fn=<AddmmBackward>)

### 5.2.1 参数访问

In [14]:
##使用Sequential类定义模型时，可以通过索引访问模型的任意层
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.3529,  0.2783, -0.2410, -0.2118, -0.0969, -0.0626, -0.0916, -0.0392]])), ('bias', tensor([-0.0467]))])


In [15]:
print(type(net[2].bias))

<class 'torch.nn.parameter.Parameter'>


In [16]:
print(net[2].bias.data)

tensor([-0.0467])


In [17]:
print(net[2].bias.grad)

None


In [21]:
##一次性访问所有的参数
##主要是利用列表推导机制来生成一个list
##通过解包操作来打印不同的元组
print([(name,param.shape) for name,param in net[0].named_parameters()])

[('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]


In [23]:
print(type(net[0].named_parameters()))

<class 'generator'>


In [24]:
##从嵌套块中收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())
def block2():
    net=nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1())
    return net
rgnet=nn.Sequential(block2(),nn.Linear(4,1))
print(rgnet)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [25]:
rgnet[0][1][0].bias

Parameter containing:
tensor([-0.2058,  0.4351,  0.3303,  0.0841,  0.1282, -0.2985,  0.3880, -0.0553],
       requires_grad=True)

In [27]:
##
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01) #_表示替换函数
        nn.init.zeros_(m.bias)
net.apply(init_normal)

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [28]:
net[0].weight.data[0]

tensor([0.0023, 0.0084, 0.0255, 0.0065])

In [32]:
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
net.apply(init_normal)
net[0].weight.data[0]

tensor([1., 1., 1., 1.])

## 5.3 自定义层

### 5.3.1 自定义不带参数的层

In [33]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,X):
        return X-X.mean()

In [34]:
layer=CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

### 5.3.2 自定义带参数的层

In [35]:
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(in_units,units))
        self.bias=nn.Parameter(torch.randn(units,))
    
    def forward(self,X):
        linear=torch.matmul(X,self.weight.data)+self.bias.data
        return F.relu(linear)

## 5.4 读写文件

In [36]:
x=torch.arange(4)
torch.save(x,'x-file')

In [37]:
x2=torch.load('x-file')