# 卷积层

In [1]:
import torch
from torch import nn

### 互相关运算

In [16]:
def corr2d(X, K):
    """计算二位互相关运算"""
    w = K.shape[0]
    h = K.shape[1]
    Y = torch.zeros((X.shape[0] - w + 1, X.shape[1] - h + 1))
    for i in range(X.shape[0] - w + 1):
        for j in range(X.shape[1] - h + 1):
            Y[i][j] = (X[i:i + w, j:j + h] * K).sum()
    return Y
x = torch.arange(9).reshape(3, 3)
k = torch.arange(4).reshape(2, 2)

In [15]:
corr(x, k)

tensor([[19., 25.],
        [37., 43.]])

### 二维卷积层

In [18]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

### 卷基层的简单应用：检测图形中的边缘

In [19]:
X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [23]:
K = torch.tensor([[1., -1.]])

In [27]:
# 边缘列不为0
Y = corr2d(X, K) 
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

### 给定X和Y去学K

In [53]:
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)
X = X.reshape(1, 1, 6, 8)
Y = Y.reshape(1, 1, 6, 7)

for i in range(10):
    conv2d.zero_grad()
    y_hat = conv2d(X)
    l = (Y - y_hat) ** 2
    l.sum().backward()
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'batch {i + 1}, loss {l.sum():.3f}')

batch 2, loss 8.251
batch 4, loss 1.973
batch 6, loss 0.572
batch 8, loss 0.195
batch 10, loss 0.073


In [54]:
conv2d.weight.data

tensor([[[[ 1.0145, -0.9604]]]])

# 填充和步幅

### 增加填充
padding为1理解为上下左右每边填充1行，(2, 1)理解为上下各2，左右各1

In [59]:
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)
X = torch.rand((8, 8)).reshape(1, 1, 8, 8)
Y = conv2d(X)
print(Y.shape)

torch.Size([1, 1, 8, 8])


In [60]:
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))
Y = conv2d(X)
print(Y.shape)

torch.Size([1, 1, 8, 8])


### 步幅
输出形状为：$$outHeight = floor[(n_{h} - k_{h} + p_{h} + s_{h}) / s_{h}]$$
填充的大小一般为：$$p_{h} = k_{h} - 1$$

In [61]:
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1), stride=2)
Y = conv2d(X)
print(Y.shape)

torch.Size([1, 1, 4, 4])


# 多输入输出通道

### 多输入通道

In [96]:
def corr2d_multi_in(X, K):
    return sum(corr2d(x, k) for x, k in zip(X, K))
    
A = torch.arange(9).reshape(3, 3) + 1
B = torch.arange(9).reshape(3, 3)
X = [A, B]
k1 = torch.arange(4).reshape(2, 2) + 1
k2 = torch.arange(4).reshape(2, 2)
K = torch.stack((k1, k2), 0)

print(corr2d_multi_in(X, K))

tensor([[ 56.,  72.],
        [104., 120.]])


### 多输入输出通道

In [97]:
def corr2d_multi_in_out(X, K):
    return torch.stack([corr2d_multi_in(X, k) for  k in K], 0)

K = torch.stack((K, K + 1, K + 2), 0)
K.shape

torch.Size([3, 2, 2, 2])

In [98]:
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])