### torch.nn模块

---
### $torch.nn.Parameter$ 类似Tensor，相当于对tensor做了一个包装

In [1]:
import torch
import torch.nn as nn

param = nn.Parameter(torch.zeros(3, 3))  # 相当于对tensor做了一个包装
print(param)
print(param.requires_grad)

Parameter containing:
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], requires_grad=True)
True


In [210]:
param.data

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

参数初始化

In [211]:
param.data.normal_()
param

Parameter containing:
tensor([[ 0.0962, -1.7333,  0.2985],
        [-0.1209,  1.0230,  2.0192],
        [-3.2366, -0.5285,  0.1298]], requires_grad=True)

---

### 卷积：$torch.nn.Conv2d()$

#### 卷积2d（同样有1d、3d） torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')
* in_channels：输入通道数
* out_channels：输出通道数
* kernel_size：卷积核大小
* stride：步长
* padding：边界填充方式
* dilation：膨胀系数
* groups：组卷积的组大小
* bias：偏置是否存在
* padding_mode：边界填充的值填充方式，有zeros、reflect、replicate、circular，默认zeros
* 其中：kernel_size、stride、padding、dilation，可以是int，h = w = value，或者tuple，即指定h x w两个值不同的大小

In [212]:
x = torch.randn(100, 1, 28, 28)  # N C H W
my_conv = torch.nn.Conv2d(1, 3, 3)
res = my_conv(x)
res.shape

torch.Size([100, 3, 26, 26])

In [213]:
x = torch.randn(100, 1, 28, 28)  # N C H W
my_conv = torch.nn.Conv2d(1, 3, 28)
res = my_conv(x)
res.shape

torch.Size([100, 3, 1, 1])

In [214]:
x = torch.randn(100, 1, 28, 28)  # N C H W
my_conv = torch.nn.Conv2d(1, 3, 3, 2)
res = my_conv(x)
res.shape

torch.Size([100, 3, 13, 13])

---

### 神经网络模块的基类，$torch.nn.Module$

* 构建一个模型，继承自模块
* 通常都是我们的训练模块集成nn.Module

In [215]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 3, (2, 3))
        self.conv2 = nn.Conv2d(3, 2, 3)
        
    def forward(self, x):
        x = self.conv1(x)
        return self.conv2(x)

In [216]:
model = Model()
model

Model(
  (conv1): Conv2d(1, 3, kernel_size=(2, 3), stride=(1, 1))
  (conv2): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1))
)

In [217]:
model.conv1.weight  # 默认是凯明初始化

Parameter containing:
tensor([[[[ 0.1981, -0.3854, -0.2998],
          [-0.1299, -0.3651, -0.4050]]],


        [[[-0.0523,  0.3435, -0.2177],
          [ 0.0449,  0.1150, -0.3624]]],


        [[[ 0.0343,  0.3950,  0.2497],
          [-0.2054,  0.3560,  0.3349]]]], requires_grad=True)

In [218]:
model.conv1.bias

Parameter containing:
tensor([-0.2688, -0.3489,  0.1358], requires_grad=True)

In [219]:
model.conv1.weight.shape  # out_channels, in_channels, kenel_size

torch.Size([3, 1, 2, 3])

In [220]:
model.conv2.weight

Parameter containing:
tensor([[[[ 0.0631, -0.1577,  0.0864],
          [ 0.0695,  0.0418, -0.0479],
          [-0.0345,  0.1800, -0.0663]],

         [[ 0.1472,  0.1366, -0.0164],
          [-0.0470,  0.0961, -0.1684],
          [ 0.0367,  0.1496, -0.1313]],

         [[-0.0197, -0.0592,  0.1552],
          [ 0.1634,  0.0754, -0.0310],
          [-0.0071, -0.0795,  0.1060]]],


        [[[-0.0322, -0.1682, -0.1350],
          [-0.1387,  0.0480, -0.1384],
          [ 0.0165,  0.1703, -0.0163]],

         [[ 0.1788, -0.0790, -0.1129],
          [-0.0547, -0.1062,  0.0633],
          [ 0.0904, -0.0729, -0.1688]],

         [[-0.1416, -0.0759, -0.0617],
          [ 0.0667,  0.1315, -0.0902],
          [ 0.0791, -0.0438, -0.0687]]]], requires_grad=True)

In [221]:
model.conv2.bias

Parameter containing:
tensor([ 0.1538, -0.1900], requires_grad=True)

### 通过add_module增加一个模块

In [222]:
model.add_module("conv5", nn.Conv2d(2, 3, 3))

In [223]:
model

Model(
  (conv1): Conv2d(1, 3, kernel_size=(2, 3), stride=(1, 1))
  (conv2): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(2, 3, kernel_size=(3, 3), stride=(1, 1))
)

### 通过torch.save(obj, f)，保存state_dict为文件

In [187]:
model.cpu()
torch.save(model.state_dict(), "model.pth")
loaded_model = torch.load("model.pth") # map_location默认是存储什么device，加载就是什么device。
# loaded_model

### 移动所有模型参数和缓冲区数据到GPU

In [188]:
model.to("cuda:2")
next(model.parameters())

Parameter containing:
tensor([[[[ 0.2324, -0.3062, -0.2115],
          [ 0.2826,  0.2069, -0.3665]]],


        [[[ 0.3550, -0.1832,  0.1157],
          [-0.0660, -0.1525,  0.0060]]],


        [[[-0.3475,  0.0140,  0.0504],
          [ 0.1541, -0.2424,  0.2314]]]], device='cuda:2', requires_grad=True)

### 使用location改变参数加载的位置

In [189]:
torch.save(model.cuda(3).state_dict(), "model.pth")
loaded_model = torch.load("model.pth", map_location="cpu")  # 将模型直接加载到cpu
loaded_model

OrderedDict([('conv1.weight',
              tensor([[[[ 0.2324, -0.3062, -0.2115],
                        [ 0.2826,  0.2069, -0.3665]]],
              
              
                      [[[ 0.3550, -0.1832,  0.1157],
                        [-0.0660, -0.1525,  0.0060]]],
              
              
                      [[[-0.3475,  0.0140,  0.0504],
                        [ 0.1541, -0.2424,  0.2314]]]])),
             ('conv1.bias', tensor([ 0.1458,  0.1404, -0.1609])),
             ('conv2.weight',
              tensor([[[[ 0.0988,  0.1114, -0.1756],
                        [ 0.1487,  0.0850, -0.1540],
                        [ 0.1552,  0.1587,  0.0191]],
              
                       [[-0.0149, -0.0124, -0.0911],
                        [ 0.0761, -0.0548,  0.0504],
                        [ 0.0675, -0.1690,  0.0053]],
              
                       [[-0.0447,  0.0690, -0.1612],
                        [-0.0191,  0.0155, -0.1424],
                        [-0.078

### 模型推理，Module.forward(*input)

In [190]:
model.cpu()

Model(
  (conv1): Conv2d(1, 3, kernel_size=(2, 3), stride=(1, 1))
  (conv2): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(2, 3, kernel_size=(3, 3), stride=(1, 1))
)

In [191]:
input = torch.zeros(1, 1, 5, 5)
model(input).shape

torch.Size([1, 2, 2, 1])

In [192]:
input = torch.zeros(1, 1, 5, 5)
model.forward(input)

tensor([[[[-0.0539],
          [-0.0539]],

         [[ 0.0420],
          [ 0.0420]]]], grad_fn=<MkldnnConvolutionBackward>)

---

### 序列容器，torch.nn.Sequential(*args)

In [193]:
model = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )
model

Sequential(
  (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)

In [194]:

model = nn.Sequential(collections.OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ]))
model

Sequential(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
)

---

### 最大池化，torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
* ceil_mode指定对于输出的计算是否向上取整
* 输出的大小：
- Input: $ (N, C, H_{in}, W_{in}) $
- Output: $ (N, C, H_{out}, W_{out}) $
    $$
      H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
            \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
    $$

    $$
      W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
            \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
    $$

In [237]:
x = torch.zeros(1, 1, 3, 3)
m = torch.nn.MaxPool2d(2, stride=2)

# (3 + 2 * 0 - 1 * (2 - 1) - 1) // 2 + 1 = (3 - 1 - 1) // 2 + 1 = 1 // 2 + 1 = 1.5 = 1
m(x).shape

torch.Size([1, 1, 1, 1])

In [196]:
x = torch.zeros(1, 1, 3, 3)
m = torch.nn.MaxPool2d(2, stride=2, ceil_mode=True)

# ceil[(3 + 2 * 0 - 1 * (2 - 1) - 1) / 2] + 1 = ceil[(3 - 1 - 1) / 2] + 1 = ceil[1 / 2] + 1 = 1 + 1 = 2
m(x).shape

torch.Size([1, 1, 2, 2])

---

### 交叉熵损失：torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
* 多元交叉熵公式：$ Loss = -sum(y * \log(p))$ 通常配合Softmax实现多分类
* 二元交叉熵：$ Loss = -sum(y * \log(p) + (1 - y) * \log(1 - p)) $  通常配合Sigmoid实现二分类

In [3]:
batch = 2
classes = 3
x = torch.randn(batch, classes)  # batch = 2, classes = 3
y = torch.tensor([1, 2])  # batch = 2
m = torch.nn.CrossEntropyLoss()
m(x, y)

tensor(0.6094)

In [204]:
one_hot = torch.nn.functional.one_hot(y, classes)
one_hot

tensor([[0, 1, 0],
        [0, 0, 1]])

In [205]:
-torch.sum(one_hot * torch.log(torch.softmax(x, dim=1))) / batch

tensor(1.8615)

---

### 线性层，torch.nn.Linear(in_features, out_features, bias=True)
bias为False时没有偏置

In [206]:
m = torch.nn.Linear(3, 2)

In [207]:
m.weight.shape

torch.Size([2, 3])

In [208]:
m.bias.shape

torch.Size([2])

---