In [1]:
#神经网络基础——Moudle类
#回顾SVM
import torch
from torch import nn
from torch.nn import functional as F
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X = torch.rand(2,20) #2*20
print(net(X))

tensor([[-0.0608, -0.0539, -0.1801,  0.1617, -0.0128,  0.1229, -0.0498, -0.0169,
         -0.1399,  0.0076],
        [-0.1066,  0.0853, -0.1614,  0.1234, -0.0211, -0.0021,  0.0097,  0.0173,
          0.0652, -0.1050]], grad_fn=<AddmmBackward>)


In [3]:
#这边nn.Sequential相当于定义了一个module
#基本上所有的层、模块都要继承Module类，
#Module类的__init__函数和forward函数。它们分别用于创建模型参数和定义前向计算。前向计算也即正向传播。
class MLP(nn.Module):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        self.hidden = nn.Linear(20,256)
        self.act = nn.ReLU()
        self.output = nn.Linear(256,10)

    # 定义模型的前向计算，即如何根据输入x计算返回所需要的模型输出
    def forward(self, x):
        a = self.act(self.hidden(x))
        return self.output(a)
#实例化得到net
net = MLP()
print(net(X))

tensor([[-0.3276, -0.0123, -0.0433,  0.2119,  0.0616, -0.2372, -0.2080,  0.2814,
         -0.0863,  0.0044],
        [-0.1913, -0.0492,  0.0495,  0.3051, -0.0784, -0.2034, -0.1656,  0.3147,
         -0.2544,  0.0923]], grad_fn=<AddmmBackward>)


In [5]:
#Module子类
#Sequential——当模型的前向计算为简单串联各个层的计算时，
#可以接收一个子模块的有序字典（OrderedDict）或者一系列子模块作为参数来逐一添加Module的实例
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X = torch.rand(2,20) #2*20
print(net(X))
print(net)

tensor([[-0.0213, -0.1035, -0.0988,  0.1665,  0.2055, -0.1810, -0.0367,  0.2841,
         -0.1657,  0.1454],
        [-0.0512, -0.1794, -0.0872,  0.1857,  0.1900, -0.1909, -0.0309,  0.2810,
         -0.1732,  0.1011]], grad_fn=<AddmmBackward>)
Sequential(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [7]:
#ModuleList接收一个子模块的列表作为输入，然后也可以类似List那样进行append和extend操作:
net = nn.ModuleList([nn.Linear(20,256),nn.ReLU()])
net.append(nn.Linear(256,10))
print(net[-1])#类似于List访问
print(net)
#ModuleList仅仅是一个储存各种模块的列表，这些模块之间没有联系也没有顺序
#（所以不用保证相邻层的输入输出维度匹配），而且没有实现forward功能需要自己实现
#ModuleList的出现只是让网络定义前向传播时更加灵活——act as iterable
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])

    def forward(self, x):
        # ModuleList can act as an iterable, or be indexed using ints
        for i, l in enumerate(self.linears):
            x = self.linears[i // 2](x) + l(x)
        return x

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [12]:
#加入到ModuleList里面的所有模块的参数会被自动添加到整个网络中
class Module_ModuleList(nn.Module):
    def __init__(self):
        super(Module_ModuleList,self).__init__()
        self.Linears = nn.ModuleList([nn.Linear(10,10)])
class Module_List(nn.Module):
    def __init__(self):
        super(Module_List,self).__init__()
        self.Linears = [nn.Linear(10,10)]
net1 = Module_ModuleList()
net2 = Module_List()
print("net1:")
for p1 in net1.parameters():
    print(p1.size())
print("net2:")
for p2 in net2.parameters():
    print(p2.size())


net1:
torch.Size([10, 10])
torch.Size([10])
net2:


In [13]:
#ModuleDict
#接受字典类的输入
net = nn.ModuleDict({
    'linear':nn.Linear(784,256),
    'act':nn.ReLU()
})
#可以像字典那样添加访问操作
net['out'] = nn.Linear(256,10)
print(net['linear'])
print(net.out)
print(net)
#但是不能直接传入数据——没有forward过程会报错NoImplementError

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (out): Linear(in_features=256, out_features=10, bias=True)
)


In [17]:
#复杂类构造——FancyMLP
class FancyMLP(nn.Module):
    def __init__(self,**kwargs):
        super(FancyMLP,self).__init__(**kwargs)
        self.rand_weight = torch.rand((20,20),requires_grad=False) #常数参数，不可训练
        self.linear = nn.Linear(20,20)
    def forward(self,x):
        x = self.linear(x)
        #nn.ReLU作为一个层结构，必须添加到nn.Module容器中才能使用
        #而F.ReLU则作为一个函数调用，看上去作为一个函数调用更方便更简洁。
        #relu = x,x>0;=0,x<=0
        x = nn.functional.relu(torch.mm(x,self.rand_weight)+1) 
        #全连接层复用，此时该层参数是共享的
        x = self.linear(x)
        #控制流，使用item返回标量,x.norm()默认求x的二范数，item()取出单元素张量的元素值并返回该值，保持原元素类型不变。
        while x.norm().item()>1:
            x/=20
        if x.norm().item()<0.8:
            x *= 10
        return x.sum()
X = torch.rand(2,20)
net = FancyMLP()
print(net)
print(net(X))
#同属于一个子类的可以嵌套使用
class NestMLP(nn.Module):
    def __init__(self,**kwargs):
        super(NestMLP,self).__init__(**kwargs)
        self.net = nn.Sequential(nn.Linear(40,30),nn.ReLU())
    def forward(self,x):
        x = self.net(x)
        return x
X = torch.rand(2,40)
net = nn.Sequential(NestMLP(),nn.Linear(30,20),FancyMLP())
print(net)
print(net(X))
        

FancyMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)
tensor(-1.8370, grad_fn=<SumBackward0>)
Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ReLU()
    )
  )
  (1): Linear(in_features=30, out_features=20, bias=True)
  (2): FancyMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)
tensor(2.1962, grad_fn=<SumBackward0>)
