## 二维卷积层
---

### 二维互相关运算

In [1]:
# 【输出矩阵计算公式】： n − k + 1
import torch
from torch import nn

def corr2d(X, K):
    h, w = K.shape # 卷积核的长宽
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)) # 建立输出矩阵
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum() # 核心运算
    return Y

In [2]:
X = torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
K = torch.tensor([[0,1],[2,3]]) # 卷积核或过滤器（filter）
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

### 二维卷积层

In [3]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.randn(kernel_size))
        self.bias = nn.Parameter(torch.randn(1))

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

### 图像中物体边缘检测
构造一张6×86×8的图像（即高和宽分别为6像素和8像素的图像）。它中间4列为黑（0），其余为白（1）

In [4]:
X = torch.ones(6, 8)
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [5]:
K = torch.tensor([[1, -1]])

In [6]:
Y = corr2d(X, K)
Y # 白到黑的边缘和从黑到白的边缘分别检测成了1和-1

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

#### 通过数据学习核数组

In [7]:
conv2d = Conv2D(kernel_size=(1,2))

step = 20
lr = 0.01
for i in range(step):
    Y_hat = conv2d(X) # 计算预测值
    l = ((Y_hat - Y) ** 2).sum() # 损失函数
    l.backward() # 反向传播，计算梯度

    # 梯度下降
    conv2d.weight.data -= lr * conv2d.weight.grad
    conv2d.bias.data -= lr * conv2d.bias.grad

    # 梯度清零
    conv2d.weight.grad.fill_(0)
    conv2d.bias.grad.fill_(0)
    if (i + 1) % 5 == 0:
        print('Step %d, loss %.3f' % (i+1, l.item()))

Step 5, loss 0.602
Step 10, loss 0.136
Step 15, loss 0.034
Step 20, loss 0.009


In [8]:
print("weight: ", conv2d.weight.data)
print("bias: ", conv2d.bias.data)

weight:  tensor([[ 1.0217, -1.0263]])
bias:  tensor([0.0026])


## 填充和步幅

In [9]:
def comp_cov2d(conv2d, X):
    X = X.view((1, 1) + X.shape) # (1, 1)代表批量大小和通道数
    Y = conv2d(X)
    return  Y.view(Y.shape[2:]) # 排除不关心的两个维度：批量、通道

conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)

X = torch.rand(8, 8)
comp_cov2d(conv2d, X).shape

torch.Size([8, 8])

In [10]:
X.shape

torch.Size([8, 8])

In [11]:
# 【填充后计算公式】： n + 2 p − k + 1 (n: 原矩阵  p: padding  k: kernel)
# 高为5、宽为3的卷积核   |    在高和宽两侧的填充数分别为2和1
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 3), padding=(2, 1))
comp_cov2d(conv2d, X).shape

torch.Size([8, 8])

In [12]:
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 3), padding=(1, 1))
comp_cov2d(conv2d, X).shape

torch.Size([6, 8])

### 步幅
![strided_formula](./img/Snipaste_2021-09-26_22-00-23.png)

In [13]:
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_cov2d(conv2d, X).shape

torch.Size([4, 4])

In [14]:
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_cov2d(conv2d, X).shape

torch.Size([2, 2])

torch.Size([5, 3])