<a href="https://colab.research.google.com/github/veager/StudyNotes/blob/new/Codes/PyTorch-Tutorial/PyTorch-torch.nn.Module%E7%B1%BB-%E6%9E%84%E5%BB%BA%E6%A8%A1%E5%9E%8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PyTorch torch.nn.Module 类 构建模型

参考资料：

- GitHub：Codes/PyTorch-Tutorial/PyTorch-torch.nn.Module类-构建模型.ipynb

- 博客：PyTorch torch.nn.Module 类 构建模型，[地址](https://www.cnblogs.com/veager/articles/16305187.html)

# 0 加载数据

In [None]:
import time
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

## 0.1 加载 Iris 数据集

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import MinMaxScaler

data = load_diabetes()
X = data.data
Y = data.target

# 将输入输出数据归一化到 [0, 1] 之间
scaler_X = MinMaxScaler().fit(X)
scaler_Y = MinMaxScaler().fit(np.expand_dims(Y, axis=1))

Xs = scaler_X.transform(X)
Ys = scaler_Y.transform(np.expand_dims(Y, axis=1))

print(Xs.shape, Ys.shape)
print(Xs[:5], Ys[:5])
print(pd.DataFrame(Xs).describe(), pd.DataFrame(Ys).describe())

# 将数据转换为 tensor 类型 
Xs_tensor = torch.tensor(Xs, dtype=torch.float)
Ys_tensor = torch.tensor(Ys, dtype=torch.float)

(442, 10) (442, 1)
[[0.66666667 1.         0.58264463 0.54929577 0.29411765 0.25697211
  0.20779221 0.28208745 0.56221737 0.43939394]
 [0.48333333 0.         0.14876033 0.35211268 0.42156863 0.30677291
  0.62337662 0.14104372 0.22244301 0.16666667]
 [0.88333333 1.         0.51652893 0.43661972 0.28921569 0.25896414
  0.24675325 0.28208745 0.49658437 0.40909091]
 [0.08333333 0.         0.30165289 0.30985915 0.49509804 0.44721116
  0.23376623 0.42313117 0.57293604 0.46969697]
 [0.51666667 0.         0.20661157 0.54929577 0.46568627 0.41733068
  0.38961039 0.28208745 0.36236911 0.33333333]] [[0.39252336]
 [0.15576324]
 [0.36137072]
 [0.56386293]
 [0.34267913]]
                0           1           2           3           4           5  \
count  442.000000  442.000000  442.000000  442.000000  442.000000  442.000000   
mean     0.491968    0.468326    0.346107    0.459818    0.451668    0.367725   
std      0.218484    0.499561    0.182567    0.194806    0.169647    0.151460   
min      0

## 0.2 全局参数设置

In [None]:
# 样本信息，划分的数据集
N_SAMPLE = Xs_tensor.size()[0]              # 样本总数
N_TRAIN = int(N_SAMPLE * 0.7)               # 训练样本数
N_VALID = int(N_SAMPLE * 0.2)               # 验证样本数
N_TEST = N_SAMPLE - N_TRAIN - N_VALID       # 测试样本数


# 训练过程超参数设置
BATCH_SIZE = 64
N_EPOCH = 1000
LEARNING_RATE = 0.05


# 神经网络模型参数
HIDDEN_DIM = 4
INPUT_DIM = Xs_tensor.size()[1]     # sizes of input data and output data
OUTPUT_DIM = Ys_tensor.size()[1]
print("NN Structure:", INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM)


# 设置 device，如果 GPU 可用，则使用
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device:", DEVICE)

NN Structure: 10 4 1
device: cuda:0


## 0.3 创建结构化数据

In [None]:
from torch.utils.data import TensorDataset, Dataset, random_split, DataLoader
from torch.utils.data.dataloader import default_collate

# DataSet 类
ds = TensorDataset(Xs_tensor, Ys_tensor)

# split training, validation, testing data
ds_train, ds_vaild, ds_test = random_split(ds, lengths=[N_TRAIN, N_VALID, N_TEST])
print(len(ds_train), len(ds_vaild), len(ds_test))


# DataLoader of train data, valid data, test data
dl_train = DataLoader(ds_train, batch_size = BATCH_SIZE, shuffle = True,
    collate_fn = lambda x: tuple(x_.to(DEVICE) for x_ in default_collate(x))
    )

dl_valid = DataLoader(ds_vaild, batch_size = BATCH_SIZE, # default shuffle = False,
    collate_fn = lambda x: tuple(x_.to(DEVICE) for x_ in default_collate(x))
    )

dl_test = DataLoader(ds_test, batch_size = BATCH_SIZE, # default shuffle = False,
    collate_fn = lambda x: tuple(x_.to(DEVICE) for x_ in default_collate(x))
    )

309 88 45


## 0.4 定义模型

In [None]:
# 方式 2: 全部使用 层类（nn.Module 类）
class BPNNModeler2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(BPNNModeler2, self).__init__()

        # Layer 1
        self.layer1_linear = nn.Linear(input_dim, hidden_dim)
        self.layer1_sigmoid = nn.Sigmoid()
        # Layer 2
        self.layer2_linear = nn.Linear(hidden_dim, output_dim)
        self.layer2_sigmoid = nn.Sigmoid()
        # Output
        self.layer2_flattern = nn.Flatten(0, -1)
    
    # 正向传播
    def forward(self, x):
        # Layer 1
        out_layer1 = self.layer1_linear(x)
        out_layer1 = self.layer1_sigmoid(out_layer1)
        # Layer 2
        out_layer2 = self.layer2_linear(out_layer1)
        out_layer2 = self.layer2_sigmoid(out_layer2)
        # Output
        out = self.layer2_flattern(out_layer2)
        return out
# ==============================================================================

### 0.4.1 初始化模型

In [None]:
# 定义模型
model = BPNNModeler2(input_dim = INPUT_DIM, hidden_dim = HIDDEN_DIM, output_dim = OUTPUT_DIM)
model = model.to(DEVICE)

## 0.5 定义损失函数

In [None]:
# 定义 损失函数 MSE 损失
def loss_func(model_out, target, reduction='mean'):
    loss = F.mse_loss(model_out, target, reduction=reduction) 
    return loss

# 1 基本框架

In [None]:
class MyModel(torch.nn.Module):
    def __init__(self, params):
        # params 传入模型的参数
        super(MyModel, self).__init__()
	# 放入需要学习的参数，一般由 nn.Layer() 或 nn.Parameter() 定义
    
    # 正向传播
    def forward(self, x):
        # x: 为传入的数据，第1个维度（x.size()[0]）为 batch size
        # 根据定义的参数，确定数据的传入顺序，构建模型
        return out
    
    # 损失函数
    def loss_func(self, out, target):
        # out：模型的输出，一般为预测值
        # target: 输出所对应的真实值
        return loss
    
    # 预测类别，用于分类模型
    def pred_label(self, prob):
        # 分类模型的 out 通常为 [0,1] 之间的概率形式，通过 torch.argmax() 函数也获取概率最大的标签
        label = torch.argmax(prob, dim, keepdim=False)
        return label

# 2 方法

## 2.1 `torch.nn.Module()` 类主要方法

## 2.2 `.zero_grad()` 方法使用

**实例 1**： 使用 `optimizer.zero_grad()`

In [None]:
N_EPOCH = 50
LEARNING_RATE = 0.1

# 定义优化器，模型参数 model.parameters() 传入到优化器中
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)  # model: 已定义的模型

for epoch in range(N_EPOCH):

    train_total_loss = 0.

    for i, (X_batch, Y_batch) in enumerate(dl_train):    # dl_train: 已定义的 DataLoader 类
        optimizer.zero_grad()    # 优化器中的模型参数的梯度归零

        out = model(X_batch)                           
        loss = loss_func(out, Y_batch.flatten())  
        loss.backward()          # 反向传播，计算梯度
        
        optimizer.step()         # 执行一步优化，更新参数

        train_total_loss += loss.item()

    # Print Traing information
    if (epoch + 1) % 1 == 0:
        print('Epoch: {0:>4}, Train Loss: {1:>10.5f}'.format(epoch+1, train_total_loss))

Epoch:    1, Train Loss:    0.37416
Epoch:    2, Train Loss:    0.35511
Epoch:    3, Train Loss:    0.34071
Epoch:    4, Train Loss:    0.32747
Epoch:    5, Train Loss:    0.32199
Epoch:    6, Train Loss:    0.31627
Epoch:    7, Train Loss:    0.31178
Epoch:    8, Train Loss:    0.30900
Epoch:    9, Train Loss:    0.30481
Epoch:   10, Train Loss:    0.30476
Epoch:   11, Train Loss:    0.30285
Epoch:   12, Train Loss:    0.30226
Epoch:   13, Train Loss:    0.30088
Epoch:   14, Train Loss:    0.29920
Epoch:   15, Train Loss:    0.30008
Epoch:   16, Train Loss:    0.29815
Epoch:   17, Train Loss:    0.29772
Epoch:   18, Train Loss:    0.29866
Epoch:   19, Train Loss:    0.29792
Epoch:   20, Train Loss:    0.29794
Epoch:   21, Train Loss:    0.29616
Epoch:   22, Train Loss:    0.29995
Epoch:   23, Train Loss:    0.29881
Epoch:   24, Train Loss:    0.29792
Epoch:   25, Train Loss:    0.29626
Epoch:   26, Train Loss:    0.29506
Epoch:   27, Train Loss:    0.29864
Epoch:   28, Train Loss:    

**实例 2**： 使用 `model.zero_grad()`

In [None]:
N_EPOCH = 50
LEARNING_RATE = 0.5

for epoch in range(N_EPOCH):

    train_total_loss = 0.

    for i, (X_batch, Y_batch) in enumerate(dl_train):   # dl_train: 已定义的 DataLoader 类
        model.zero_grad()       # 模型参数参数梯度归零    # model: 已定义的模型

        out = model(X_batch)                           
        loss = loss_func(out, Y_batch.flatten())  
        loss.backward()         # 反向传播，计算梯度

        with torch.no_grad():   # 更新参数时，要取消梯度追踪
            for param in model.parameters():
                param -= LEARNING_RATE * param.grad     # 更新参数

        train_total_loss += loss.item()

    # Print Traing information
    if (epoch + 1) % 1 == 0:
        print('Epoch: {0:>4}, Train Loss: {1:>10.5f}'.format(epoch+1, train_total_loss))

Epoch:    1, Train Loss:    0.30232
Epoch:    2, Train Loss:    0.29653
Epoch:    3, Train Loss:    0.29755
Epoch:    4, Train Loss:    0.29814
Epoch:    5, Train Loss:    0.29646
Epoch:    6, Train Loss:    0.29829
Epoch:    7, Train Loss:    0.29925
Epoch:    8, Train Loss:    0.29641
Epoch:    9, Train Loss:    0.29618
Epoch:   10, Train Loss:    0.29272
Epoch:   11, Train Loss:    0.29529
Epoch:   12, Train Loss:    0.29557
Epoch:   13, Train Loss:    0.29404
Epoch:   14, Train Loss:    0.29642
Epoch:   15, Train Loss:    0.29439
Epoch:   16, Train Loss:    0.29238
Epoch:   17, Train Loss:    0.29295
Epoch:   18, Train Loss:    0.29500
Epoch:   19, Train Loss:    0.29573
Epoch:   20, Train Loss:    0.29591
Epoch:   21, Train Loss:    0.29823
Epoch:   22, Train Loss:    0.29290
Epoch:   23, Train Loss:    0.29327
Epoch:   24, Train Loss:    0.29579
Epoch:   25, Train Loss:    0.29401
Epoch:   26, Train Loss:    0.29404
Epoch:   27, Train Loss:    0.29444
Epoch:   28, Train Loss:    

## 2.3 训练模式和评估模型

In [None]:
N_EPOCH = 50
LEARNING_RATE = 0.1

# 定义优化器
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

# 模型训练过程
for epoch in range(N_EPOCH):
    # 模型训练
    train_total_loss = 0.

    model.train()    # 启动模型训练模式
    for i, (X_batch, Y_batch) in enumerate(dl_train):
        optimizer.zero_grad()

        out = model(X_batch)
        loss = loss_func(out, Y_batch.flatten())

        loss.backward()
        optimizer.step()
        train_total_loss += loss.item()
    

    # 评估验证集 方式 1: 使用 torch.no_grad()
    vaild_total_loss = 0.
    
    model.eval()    # 启动模型评估模式
    with torch.no_grad():
        for X_valid, Y_valid in dl_valid:
            out = model(X_valid)
            loss = loss_func(out, Y_valid.flatten())
            vaild_total_loss += loss.item()


    # 评估验证集 方式 2: 使用 .detach()
    vaild_total_loss = 0.

    model.eval()    # 启动模型评估模式
    for X_valid, Y_valid in dl_valid:
        # X_valid = X_valid.detach()  # 取消对 tensor 的梯度跟踪
        # Y_valid = Y_valid.detach()
        out = model(X_valid)

        # print(X_valid.requires_grad, out.requires_grad)
        # Output: False, True

        out = out.detach() # 取消对 tensor 的梯度跟踪

        loss = loss_func(out, Y_valid.flatten())
        vaild_total_loss += loss.item()

    if (epoch+1) % 1 == 0:
        print(epoch + 1, ':', 
              round(train_total_loss, 5),
              round(vaild_total_loss, 5))

1 : 0.28574 0.10303
2 : 0.28534 0.10298
3 : 0.28559 0.10295
4 : 0.28472 0.10293
5 : 0.28485 0.10289
6 : 0.28374 0.10286
7 : 0.28326 0.10284
8 : 0.28441 0.10282
9 : 0.28756 0.10278
10 : 0.28675 0.10274
11 : 0.28629 0.10272
12 : 0.28579 0.10269
13 : 0.28444 0.10268
14 : 0.28246 0.10264
15 : 0.28409 0.10261
16 : 0.284 0.1026
17 : 0.28582 0.10256
18 : 0.28458 0.10252
19 : 0.28587 0.10246
20 : 0.28354 0.10242
21 : 0.28332 0.10239
22 : 0.2855 0.10236
23 : 0.28331 0.10232
24 : 0.28471 0.10227
25 : 0.28137 0.10224
26 : 0.28565 0.10221
27 : 0.285 0.10217
28 : 0.28222 0.10214
29 : 0.28187 0.10213
30 : 0.28197 0.10211
31 : 0.28449 0.10208
32 : 0.28121 0.10204
33 : 0.28208 0.102
34 : 0.28421 0.10197
35 : 0.28069 0.10196
36 : 0.2813 0.10193
37 : 0.28155 0.10192
38 : 0.28195 0.10187
39 : 0.28237 0.10181
40 : 0.28391 0.10175
41 : 0.28116 0.10173
42 : 0.28197 0.1017
43 : 0.28228 0.10165
44 : 0.28003 0.10161
45 : 0.28203 0.10158
46 : 0.28242 0.10154
47 : 0.28016 0.10151
48 : 0.28093 0.10148
49 : 0.2816

## 2.4 GPU 部署相关

## 2.5 获取模型参数和模型结构

### 2.5.1 获取参数

#### A 遍历所有参数

In [None]:
layer_linear = nn.Linear(3, 5)

print("---------.parameters() 方法-------------------")
for param in layer_linear.parameters():
    print(type(param))
    print(param.data.size())

print("---------.named_parameters() 方法-------------")
for name, param in layer_linear.named_parameters():
    print(type(param))
    print(name, param.data.size())

print("---------.state_dict() 方法-------------------")
for name, tensor in layer_linear.state_dict().items():
    print(type(tensor))
    print(name, tensor.size())

print("---------.state_dict() 方法，遍历参数名--------")
for name in layer_linear.state_dict():
    print(name)

---------.parameters() 方法-------------------
<class 'torch.nn.parameter.Parameter'>
torch.Size([5, 3])
<class 'torch.nn.parameter.Parameter'>
torch.Size([5])
---------.named_parameters() 方法-------------
<class 'torch.nn.parameter.Parameter'>
weight torch.Size([5, 3])
<class 'torch.nn.parameter.Parameter'>
bias torch.Size([5])
---------.state_dict() 方法-------------------
<class 'torch.Tensor'>
weight torch.Size([5, 3])
<class 'torch.Tensor'>
bias torch.Size([5])
---------.state_dict() 方法，遍历参数名--------
weight
bias


遍历无**学习参数**的 Module，输出为空

In [None]:
layer_act = nn.Sigmoid()

print("---------.parameters() 方法-------------------")
for param in layer_act.parameters():
    print(param.data.size())

print("---------.named_parameters() 方法-------------")
for name, param in layer_act.named_parameters():
    print(name, param.data.size())

print("---------.state_dict() 方法-------------------")
for name, tensor in layer_act.state_dict().items():
    print(type(tensor))
    print(name, tensor.size())

print("---------.state_dict() 方法，遍历参数名--------")
for name in layer_act.state_dict():
    print(name)

---------.parameters() 方法-------------------
---------.named_parameters() 方法-------------
---------.state_dict() 方法-------------------
---------.state_dict() 方法，遍历参数名--------


#### B. 获取特定的学习参数

In [None]:
print(list(layer_linear.parameters())[0].data)

print(dict(layer_linear.named_parameters())['weight'].data)

print(layer_linear.state_dict()['weight'])

tensor([[-0.5668, -0.3317,  0.3824],
        [ 0.3101,  0.4283,  0.0551],
        [-0.3675,  0.3014,  0.4452],
        [ 0.1134,  0.5319,  0.3761],
        [ 0.4877, -0.0333, -0.3625]])
tensor([[-0.5668, -0.3317,  0.3824],
        [ 0.3101,  0.4283,  0.0551],
        [-0.3675,  0.3014,  0.4452],
        [ 0.1134,  0.5319,  0.3761],
        [ 0.4877, -0.0333, -0.3625]])
tensor([[-0.5668, -0.3317,  0.3824],
        [ 0.3101,  0.4283,  0.0551],
        [-0.3675,  0.3014,  0.4452],
        [ 0.1134,  0.5319,  0.3761],
        [ 0.4877, -0.0333, -0.3625]])


### 2.5.2 获取模块

#### A 遍历所有模块

In [None]:
contain_seq = nn.Sequential(
    nn.Linear(3, 5), 
    nn.Sigmoid(), 
    nn.Sequential(
        nn.Linear(5, 2), 
        nn.Sigmoid()
    ))

print("---------.parameters() 方法-------------------")
for param in contain_seq.parameters():
    print(type(param))
    print(param.data.size())

print("---------.named_parameters() 方法-------------")
for name, param in contain_seq.named_parameters():
    print(type(param))
    print(name, param.data.size())

print("---------.state_dict() 方法-------------------")
for name, tensor in contain_seq.state_dict().items():
    print(type(tensor))
    print(name, tensor.size())

print("---------.modules() 方法----------------------")
# 获取模型中的 所有 模块
for module in contain_seq.modules():
    print(type(module))
    print(module)

print("---------.named_modules() 方法----------------")
# 获取模型中的 所有 模块名 和 模块
for name, module in contain_seq.named_modules():
    print(type(module))
    print(name, module)

print("---------.named_children() 方法----------------")
# 获取模型中的 直接 子模块名称 和 子模块
for name, module in contain_seq.named_children():
    print(type(module))
    print(name, module)

---------.parameters() 方法-------------------
<class 'torch.nn.parameter.Parameter'>
torch.Size([5, 3])
<class 'torch.nn.parameter.Parameter'>
torch.Size([5])
<class 'torch.nn.parameter.Parameter'>
torch.Size([2, 5])
<class 'torch.nn.parameter.Parameter'>
torch.Size([2])
---------.named_parameters() 方法-------------
<class 'torch.nn.parameter.Parameter'>
0.weight torch.Size([5, 3])
<class 'torch.nn.parameter.Parameter'>
0.bias torch.Size([5])
<class 'torch.nn.parameter.Parameter'>
2.0.weight torch.Size([2, 5])
<class 'torch.nn.parameter.Parameter'>
2.0.bias torch.Size([2])
---------.state_dict() 方法-------------------
<class 'torch.Tensor'>
0.weight torch.Size([5, 3])
<class 'torch.Tensor'>
0.bias torch.Size([5])
<class 'torch.Tensor'>
2.0.weight torch.Size([2, 5])
<class 'torch.Tensor'>
2.0.bias torch.Size([2])
---------.modules() 方法----------------------
<class 'torch.nn.modules.container.Sequential'>
Sequential(
  (0): Linear(in_features=3, out_features=5, bias=True)
  (1): Sigmoid()
 

#### B. 获取指定的模块

In [None]:
print(list(contain_seq.modules())[0])

print(dict(contain_seq.named_modules())['2.0'])

Sequential(
  (0): Linear(in_features=3, out_features=5, bias=True)
  (1): Sigmoid()
  (2): Sequential(
    (0): Linear(in_features=5, out_features=2, bias=True)
    (1): Sigmoid()
  )
)
Linear(in_features=5, out_features=2, bias=True)


#### C. 获取指定模块的学习参数

## 2.6 关闭梯度（冻结层）

# 3 实例：BPNN 神经网络的 4 种构建方法及分析

## 3.1 BPNN 神经网络的 4 种构建方法

定义一个 单隐层神经网络，4 种方式

- 方式 1: 使用 `nn.Sequential()`

- 方式 2: 全部使用 层类（`nn.Module` 类）

- 方式 3：使用函数类型：`torch.sigmoid()` 和 `torch.flatten()`

- 方式 4：使用 `nn.Parameter()`

#### A 方式 1：使用 `nn.Sequential()`

In [None]:
# 方式 1: 使用 nn.Sequential()
class BPNNModeler(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(BPNNModeler, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), 
            nn.Sigmoid(),
            nn.Linear(hidden_dim, output_dim), 
            nn.Sigmoid(),
            nn.Flatten(0, -1)
        )
    
    # 正向传播
    def forward(self, x):
        out = self.model(x)
        return out
# ==============================================================================

#### B 方式 2：全部使用 层类

In [None]:
# 方式 2: 全部使用 层类（nn.Module 类）
class BPNNModeler2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(BPNNModeler2, self).__init__()

        # Layer 1
        self.layer1_linear = nn.Linear(input_dim, hidden_dim)
        self.layer1_sigmoid = nn.Sigmoid()
        # Layer 2
        self.layer2_linear = nn.Linear(hidden_dim, output_dim)
        self.layer2_sigmoid = nn.Sigmoid()
        # Output
        self.layer2_flattern = nn.Flatten(0, -1)
    
    # 正向传播
    def forward(self, x):
        # Layer 1
        out_layer1 = self.layer1_linear(x)
        out_layer1 = self.layer1_sigmoid(out_layer1)
        # Layer 2
        out_layer2 = self.layer2_linear(out_layer1)
        out_layer2 = self.layer2_sigmoid(out_layer2)
        # Output
        out = self.layer2_flattern(out_layer2)
        return out
# ==============================================================================

#### C 方式 3：使用函数类型：`torch.sigmoid()` 和 `torch.flatten()`

In [None]:
# 方式 3：使用函数类型：torch.sigmoid() 和 torch.flatten()
class BPNNModeler3(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(BPNNModeler3, self).__init__()
        # Layer 1
        self.layer1_linear = nn.Linear(input_dim, hidden_dim)
        # Layer 2
        self.layer2_linear = nn.Linear(hidden_dim, output_dim)

    # 正向传播
    def forward(self, x):
        # Layer 1
        out_layer1 = torch.sigmoid(self.layer1_linear(x))
        # Layer 2
        out_layer2 = torch.sigmoid(self.layer2_linear(out_layer1))
        # Output
        out = torch.flatten(out_layer2)
        return out
# ==============================================================================

#### D 方式 4：使用 `nn.Parameter()`

In [None]:
# 方式 4：使用 nn.Parameter()
class BPNNModeler4(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(BPNNModeler4, self).__init__()
        # Layer 1
        self.w1 = nn.Parameter(torch.rand((input_dim, hidden_dim)))
        self.b1 = nn.Parameter(torch.rand(hidden_dim))
        # Layer 2
        self.w2 = nn.Parameter(torch.rand((hidden_dim, output_dim)))
        self.b2 = nn.Parameter(torch.rand(hidden_dim))
    
    # 正向传播
    def forward(self, x):
        # Layer 1
        out_layer1 = torch.mm(x, self.w1) + self.b1
        out_layer1 = torch.sigmoid(out_layer1)
        # Layer 2
        out_layer2 = torch.mm(out_layer1, self.w2) + self.b2
        out_layer2 = torch.sigmoid(out_layer2)
        # Output
        out = torch.flatten(out_layer2)
        return out
# ==============================================================================

## 3.2 区别

In [None]:
def print_info(model):

    print("---------.parameters() 方法-------------------")
    for param in model.parameters():
        print(type(param), param.data.size())

    print("---------.named_parameters() 方法-------------")
    for name, param in model.named_parameters():
        print(type(param), name, param.data.size())
    
    print("---------.state_dict() 方法-------------------")
    for name, tensor in model.state_dict().items():
        print(type(tensor), name, tensor.size())

    print("---------.modules() 方法----------------------")
    for module in model.modules():
        print(type(module), module)

    print("---------.named_modules() 方法----------------")
    for name, module in model.named_modules():
        print(type(module), name, module)

    print("---------.named_children() 方法----------------")
    for name, module in model.named_children():
        print(type(module), name, module)

In [None]:
# 神经网络模型参数
HIDDEN_DIM = 10
INPUT_DIM = 5     
OUTPUT_DIM = 1


# 实例化一个神经网络模型
model = BPNNModeler(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, output_dim=OUTPUT_DIM)

model2 = BPNNModeler2(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, output_dim=OUTPUT_DIM)

model3 = BPNNModeler3(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, output_dim=OUTPUT_DIM)

model4 = BPNNModeler4(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, output_dim=OUTPUT_DIM)


print_info(model)

print_info(model2)

print_info(model3)

print_info(model4)

---------.parameters() 方法-------------------
<class 'torch.nn.parameter.Parameter'> torch.Size([10, 5])
<class 'torch.nn.parameter.Parameter'> torch.Size([10])
<class 'torch.nn.parameter.Parameter'> torch.Size([1, 10])
<class 'torch.nn.parameter.Parameter'> torch.Size([1])
---------.named_parameters() 方法-------------
<class 'torch.nn.parameter.Parameter'> model.0.weight torch.Size([10, 5])
<class 'torch.nn.parameter.Parameter'> model.0.bias torch.Size([10])
<class 'torch.nn.parameter.Parameter'> model.2.weight torch.Size([1, 10])
<class 'torch.nn.parameter.Parameter'> model.2.bias torch.Size([1])
---------.state_dict() 方法-------------------
<class 'torch.Tensor'> model.0.weight torch.Size([10, 5])
<class 'torch.Tensor'> model.0.bias torch.Size([10])
<class 'torch.Tensor'> model.2.weight torch.Size([1, 10])
<class 'torch.Tensor'> model.2.bias torch.Size([1])
---------.modules() 方法----------------------
<class '__main__.BPNNModeler'> BPNNModeler(
  (model): Sequential(
    (0): Linear(in