In [4]:
import torch
from torch import nn
from torch.nn import functional as F

# 层和块

In [5]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[-0.0559,  0.2158,  0.1552, -0.1478,  0.2858,  0.0304,  0.2131, -0.2499,
          0.1447, -0.3082],
        [-0.1454,  0.2696,  0.1304, -0.1534,  0.3438, -0.1335,  0.2757, -0.0790,
          0.1838, -0.2413]], grad_fn=<AddmmBackward>)

## 感知机


In [6]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, X):
        # 此处用F.relu是因为relu是 激活函数
        # 不含参数 因此不用单独记为一层
        # nn.ReLU是一个类 而relu是一个函数
        return self.out(F.relu(self.hidden(X)))

In [7]:
net = MLP()
# 此处通过__call()__函数进行调用 即是一个可调用对象
net(X)

tensor([[ 0.2382, -0.0882,  0.0009,  0.1618,  0.0275,  0.0590,  0.0688,  0.1739,
         -0.2814,  0.0353],
        [ 0.2753, -0.2081,  0.0754,  0.0461,  0.0655,  0.1305,  0.0863,  0.2872,
         -0.2564, -0.1132]], grad_fn=<AddmmBackward>)

## nn.Sequential是如何实现的

## 顺序块

In [38]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        print(self._modules)
        for block in args:
            print(block)
            self._modules[block] = block
            
    def forward(self, X):
        for block in self._modules.values():
            print(block)
            X = block(X)
        return X
    
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

OrderedDict()
Linear(in_features=20, out_features=256, bias=True)
ReLU()
Linear(in_features=256, out_features=10, bias=True)
Linear(in_features=20, out_features=256, bias=True)
ReLU()
Linear(in_features=256, out_features=10, bias=True)


tensor([[ 0.0900, -0.2613, -0.1118, -0.0734,  0.1978,  0.2938,  0.0646, -0.0612,
          0.0968, -0.0365],
        [ 0.0864, -0.3624, -0.1294, -0.1335,  0.2767,  0.2676,  0.0715, -0.2268,
          0.2318,  0.0090]], grad_fn=<AddmmBackward>)

## 在正向传播函数中执行代码

In [9]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20,20), requires_grad=False)
        self.linear = nn.Linear(20,20)
    
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum()>1:
            X /= 2
        return X.sum()

In [11]:
net = FixedHiddenMLP()
net(X)

tensor(-0.0089, grad_fn=<SumBackward0>)

## 练习1

In [37]:
class MySequential2(nn.Module):
    
    def __init__(self, *args):
        super().__init__()
        self.seqlist = []
        for block in args:
            self.seqlist.append(block)
    
    def forward(self, X):
        for block in self.seqlist:
            X = block(X)
        return X
net = MySequential2(nn.Linear(20,256), nn.Linear(256, 10))
net(X)

tensor([[-0.0215,  0.1072,  0.3155,  0.1401, -0.1343, -0.2321,  0.0727,  0.2232,
         -0.2747, -0.1684],
        [ 0.1457, -0.2081,  0.3325,  0.2457, -0.0579, -0.1076,  0.2100,  0.5497,
         -0.2181, -0.1995]], grad_fn=<AddmmBackward>)

## 练习2

In [43]:
class NET1(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(20,128)
        self.linear2 = nn.Linear(128, 64)
    
    def forward(self, X):
        return self.linear2(self.linear1(X))

class NET2(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(64, 32)
        self.linear2 = nn.Linear(32, 10)
    
    def forward(self, X):
        return self.linear2(F.relu(self.linear1(X)))

class NET3(nn.Module):
    def __init__(self, net1, net2):
        super().__init__()
        self.net1 = net1
        self.net2 = net2
    
    def forward(self, X):
        return self.net2(self.net1(X))
    
net1 = NET1()
net2 = NET2()
# net2(net1(X))
net3 = NET3(net1, net2)
net3(X)

tensor([[ 0.1946, -0.0354, -0.0998,  0.0151, -0.3166, -0.0090,  0.0552, -0.2227,
          0.0759, -0.1213],
        [ 0.1356, -0.0145, -0.1149,  0.0410, -0.3505,  0.0749,  0.0423, -0.1783,
          0.1479, -0.1061]], grad_fn=<AddmmBackward>)

1