# 二维卷积层

## 二维互相关运输

In [2]:
import torch
from torch import nn

def corr2d(X,K):
    h,w=K.shape
    Y=torch.zeros((X.shape[0]-h+1,X.shape[1]-w+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
    return Y

In [3]:
X=torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
K=torch.tensor([[0,1],[2,3]])
corr2d(X,K)

tensor([[19., 25.],
        [37., 43.]])

## 二维卷积层

⼆维卷积层将输⼊和卷积核做互相关运算，并加上⼀个标量偏差来得到输出。

In [4]:
class Conv2D(nn.Module):
    def __init__(self,kernel_size):
        super(Conv2D,self).__init__()
        self.weight=nn.Parameter(torch.randn(kernel_size))
        self.bias=nn.Parameter(torch.randn(1))
    def forward(self,x):
        return corr2d(x,self.weight)+self.bias

## 图像中物体边缘检测

In [5]:
X=torch.ones(6,8)
X[:,2:6]=0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [9]:
K=torch.tensor([[1,-1]])
K.shape

torch.Size([1, 2])

In [10]:
Y=corr2d(X,K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

## 通过数据学习核数组

In [11]:
conv2d=Conv2D(kernel_size=(1,2))
step=20
lr=0.01
for i in range(step):
    Y_hat=conv2d(X)
    l=((Y_hat-Y)**2).sum()
    l.backward()
    
    conv2d.weight.data-=lr*conv2d.weight.grad
    conv2d.bias.data-=lr*conv2d.bias.grad
    
    conv2d.weight.grad.fill_(0)
    conv2d.bias.grad.fill_(0)
    if (i+1)%5==0:
        print('Step %d,loss %.3f'%(i+1,l.item()))

Step 5,loss 11.466
Step 10,loss 3.169
Step 15,loss 0.880
Step 20,loss 0.245


In [12]:
print('weight:',conv2d.weight.data)
print('bias:',conv2d.bias.data)

weight: tensor([[ 0.8764, -0.8724]])
bias: tensor([-0.0022])


## 互相关运算和卷积运算

卷积层为何能使⽤互相关运算替代卷积运算。其实，在深度学习中核数组都是学出
来的：卷积层⽆论使⽤互相关运算或卷积运算都不影响模型预测时的输出。

## 特征图和感受野

⼆维卷积层输出的⼆维数组可以看作是输⼊在空间维度（宽和⾼）上某⼀级的表征，也叫特征图
（feature map）。影响元素x的前向计算的所有可能输⼊区域（可能⼤于输⼊的实际尺⼨）叫做x的
感受野（receptive field）。

可⻅，我们可以通过更深的卷积神经⽹络使特征图中单个元素的
感受野变得更加⼴阔，从⽽捕捉输⼊上更⼤尺⼨的特征。

# 填充和步幅

## 填充

In [17]:
import torch
from torch import nn

def comp_conv2d(conv2d,X):
    X=X.view((1,1)+X.shape)
    Y=conv2d(X)
    return Y.view(Y.shape[2:])

# 注意这⾥是两侧分别填充1⾏或列，所以在两侧⼀共填充2⾏或列
conv2d=nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,padding=1)
X=torch.rand(8,8)
comp_conv2d(conv2d,X).shape

torch.Size([8, 8])

当卷积核的⾼和宽不同时，我们也可以通过设置⾼和宽上不同的填充数使输出和输⼊具有相同的⾼和
宽

In [18]:
conv2d=nn.Conv2d(in_channels=1,out_channels=1,kernel_size=(5,3),padding=(2,1))
comp_conv2d(conv2d,X).shape

torch.Size([8, 8])

## 步幅

In [19]:
conv2d=nn.Conv2d(1,1,kernel_size=3,padding=1,stride=2)
comp_conv2d(conv2d,X).shape

torch.Size([4, 4])

In [20]:
conv2d=nn.Conv2d(1,1,kernel_size=(3,5),padding=(0,1),stride=(3,4))
comp_conv2d(conv2d,X).shape

torch.Size([2, 2])

# 多输入通道和多输出通道

## 多输入通道

In [22]:
import torch
from torch import nn
import sys

def corr2d(X,K):
    h,w=K.shape
    Y=torch.zeros((X.shape[0]-h+1,X.shape[1]-w+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
    return Y

def corr2d_multi_in(X,K):
    res=corr2d(X[0,:,:],K[0,:,:])
    for i in range(1,X.shape[0]):
        res+=corr2d(X[i,:,:],K[i,:,:])
    return res

In [24]:
X=torch.tensor([[[0,1,2],[3,4,5],[6,7,8]],[[1,2,3],[4,5,6],[7,8,9]]])
K=torch.tensor([[[0,1],[2,3]],[[1,2],[3,4]]])
corr2d_multi_in(X,K)

tensor([[ 56.,  72.],
        [104., 120.]])

## 多输出通道

In [25]:
def corr2d_multi_in_out(X,K):
    return torch.stack([corr2d_multi_in(X,k) for k in K])

In [35]:
K=torch.tensor([[[0,1],[2,3]],[[1,2],[3,4]]])
K=torch.stack([K,K+1,K+2])
K.shape

torch.Size([3, 2, 2, 2])

In [36]:
corr2d_multi_in_out(X,K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

## 1X1卷积层

假设我们将通道维当作特征维，将⾼和宽维度上的元素当成数据样本，那么1x1卷积层的作⽤与全连接层等价

In [37]:
def corr2d_multi_in_out_1x1(X,K):
    c_i,h,w=X.shape
    c_o=K.shape[0]
    X=X.view(c_i,h*w)
    K=K.view(c_o,c_i)
    Y=torch.mm(K,X)#全连接层的矩阵乘法
    return Y.view(c_o,h,w)

In [43]:
X=torch.rand(3,3,3)
K=torch.rand(2,3,1,1)
Y1=corr2d_multi_in_out_1x1(X,K)
Y2=corr2d_multi_in_out(X,K)
print(Y1.shape)
(Y1-Y2).norm().item()<1e-6

torch.Size([2, 3, 3])


True

# 池化层

## 二维最大池化层和平均池化层

In [44]:
import torch
from torch import nn

def pool2d(X,pool_size,mode='max'):
    X=X.float()
    p_h,p_w=pool_size
    Y=torch.zeros(X.shape[0]-p_h+1,X.shape[1]-p_w+1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode=='max':
                Y[i,j]=X[i:i+p_h,j:j+p_w].max()
            elif mode=='avg':
                Y[i,j]=X[i:i+p_h,j:j+p_w].mean()
    return Y

In [46]:
X=torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
pool2d(X,(2,2))

tensor([[4., 5.],
        [7., 8.]])

In [47]:
pool2d(X,(2,2),'avg')

tensor([[2., 3.],
        [5., 6.]])

## 填充和步幅

In [48]:
X=torch.arange(16,dtype=torch.float).view((1,1,4,4))
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [49]:
pool2d=nn.MaxPool2d(3)
pool2d(X)

tensor([[[[10.]]]])

In [50]:
pool2d=nn.MaxPool2d(3,padding=1,stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [51]:
pool2d=nn.MaxPool2d((2,4),padding=(1,2),stride=(2,3))
pool2d(X)

tensor([[[[ 1.,  3.],
          [ 9., 11.],
          [13., 15.]]]])

## 多通道

In [52]:
X=torch.cat((X,X+1),dim=1)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [53]:
pool2d=nn.MaxPool2d(3,padding=1,stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])