## Pytorch nn.Conv2d卷积网络使用教程

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [2]:
# 定义参数
in_channels = 1
out_channels = 1
kernel_size = 3
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4]

# 定义输入特征图
input_feature_map = torch.randn(input_size)
# 用nn.Conv2d创建卷积层
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
# 卷积运算
output_feature_map = conv_layer(input_feature_map)
print(output_feature_map)

# 函数接口实现的卷积运算
output_feature_map = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map)


tensor([[[[ 0.1191, -0.1261],
          [-0.0245,  0.3192]]]], grad_fn=<ConvolutionBackward0>)
tensor([[[[ 0.1191, -0.1261],
          [-0.0245,  0.3192]]]], grad_fn=<ConvolutionBackward0>)


## 手写并验证二维卷积

### step1 滑动窗口实现二维卷积，不考虑batch size维度和channel维度

In [34]:
input = torch.randn(5, 5) # 卷积输入特征图
kernel = torch.randn(3, 3) # 卷积核
bias = torch.randn(1) # 卷积偏置， 默认输出通道数目等于1

def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding)) # 左右上下pad
    
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    
    output_h = math.floor((input_h - kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - kernel_w) / stride) + 1 # 卷积输出的宽度
    
    output = torch.zeros(output_h, output_w) # 初始化输出矩阵
    
    for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
        for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w] # 取出被核滑动到的区域
            output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias # 点乘，并赋值给输出位置的元素
    
    return output

# 矩阵乘法实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1, stride=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, input.shape[0], input.shape[1])), \
                                   kernel.reshape((1, 1, kernel.shape[0], kernel.shape[1])), \
                                   padding=1, \
                                   bias=bias, \
                                   stride=2).squeeze()
# 验证滑动窗口的版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
print(compare_result)

True


### step2 向量乘法实现二维卷积，只flatten卷积区域版本，不考虑batch size维度和channel维度

In [39]:
input = torch.randn(5, 5) # 卷积输入特征图
kernel = torch.randn(3, 3) # 卷积核
bias = torch.randn(1) # 卷积偏置， 默认输出通道数目等于1

def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding)) # 左右上下pad
    
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    
    output_h = math.floor((input_h - kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - kernel_w) / stride) + 1 # 卷积输出的宽度
    
    region_matrix = torch.zeros(output_h * output_w, kernel.numel()) # 存储着所有的拉平后特征区域
    kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量（矩阵形式）
    
    for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
        for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w] # 取出被核滑动到的区域
            region_vector = torch.flatten(region)
            region_matrix[i//stride * output_w + j//stride] = region_vector
    
    # 矩阵相乘
    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape((output_h, output_w)) + bias
    
    return output

# 矩阵乘法实现卷积的结果，只flatten卷积区域版本
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1, stride=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, input.shape[0], input.shape[1])), \
                                   kernel.reshape((1, 1, kernel.shape[0], kernel.shape[1])), \
                                   padding=1, \
                                   bias=bias, \
                                   stride=2).squeeze()
# 验证矩阵乘法只flatten卷积区域版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)
print(compare_result)

True


### step3 张量乘法实现二维卷积，flatten整个input版本，完整版本

In [79]:
input = torch.randn(4, 3, 5, 5) # 卷积输入特征图
kernel = torch.randn(64, 3, 3, 3) # 卷积核
bias = torch.randn(64)

def matrix_multiplication_for_conv2d_full(input, kernel, bias=None, stride=1, padding=0):
    # input, kernel都是4维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0)) # 左右、上下、channel、batch size
    
    bs, in_channel, input_h, input_w = input.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channel)
    
    output_h = math.floor((input_h - kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - kernel_w) / stride) + 1 # 卷积输出的宽度
    
    kernel_matrix = torch.zeros((out_channel, in_channel * input_h * input_w, output_h * output_w))
    
    for k in range(out_channel):
        tmp_kernel_matrix = torch.zeros((in_channel, input_h, input_w, output_h * output_w))
        for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
            for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
                tmp_kernel_matrix[:, i:i+kernel_h, j:j+kernel_w, i//stride * output_w + j//stride] = kernel[k, :, :, :] # 卷积区域填充卷积核参数
        kernel_matrix[k, :, :] = tmp_kernel_matrix.reshape((-1, output_h * output_w))
    
    full_kernel_matrix = kernel_matrix.repeat(bs, 1, 1, 1)
    
    input_matrix = input.reshape((bs, 1, 1, -1)).repeat(1, out_channel, 1, 1)
    output = torch.matmul(input_matrix, full_kernel_matrix).reshape((bs, out_channel, output_h, output_w)) + bias.reshape((1, -1, 1, 1))
    
    return output

# 矩阵乘法实现卷积的结果，flatten整个input版本
mat_mul_conv_output_full = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)

# 验证矩阵乘法flatten整个input版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output_full, pytorch_api_conv_output, rtol=0.00001, atol=0.00001)
print(compare_result)

True
