## 二维卷积代码演示

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from math import floor

定义常量

In [2]:
in_channels = 1
out_channels = 1
kernel_size = 3
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4]

In [3]:
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
input_feature_map = torch.randn(input_size)
output_feature_map = conv_layer(input_feature_map)

print(input_feature_map)
print(conv_layer.weight)
print('conv shape: ', conv_layer.weight.shape)  # out_channels * in_channels * height * width
print(output_feature_map)

tensor([[[[-0.4572,  0.5507,  1.5359, -1.5225],
          [-0.1363, -1.1387,  0.7246,  1.1263],
          [ 0.2960,  0.6730, -1.0714,  1.1075],
          [ 0.4757,  0.7135,  0.5020, -0.1362]]]])
Parameter containing:
tensor([[[[ 0.2842,  0.0535,  0.3133],
          [-0.0275,  0.0215, -0.2678],
          [-0.0581, -0.2202, -0.1530]]]], requires_grad=True)
conv shape:  torch.Size([1, 1, 3, 3])
tensor([[[[ 0.1644, -0.4657],
          [ 0.1590, -0.4014]]]], grad_fn=<ConvolutionBackward0>)


In [4]:
output_feature_map1  = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map1)

tensor([[[[ 0.1644, -0.4657],
          [ 0.1590, -0.4014]]]], grad_fn=<ConvolutionBackward0>)


### 实现卷积

In [5]:
input = torch.randn(5, 5)  # 卷积输入特征图
kernel = torch.randn(3, 3)  # 卷积核
bias = torch.randn(1)  # 卷积偏置 默认输出通道数目为1

print(input)
print(kernel)
print(bias)

tensor([[ 1.4450,  1.5122,  0.6525, -1.7100,  0.3809],
        [ 0.6517,  0.2388, -0.5335, -0.2180,  1.2641],
        [ 0.8478, -0.1348, -1.5858,  0.1307,  0.0873],
        [-0.5883, -0.1998,  1.1304,  2.1471, -1.2738],
        [-1.4078, -0.7528,  0.0373,  0.6296,  0.3642]])
tensor([[-0.6550, -2.7360, -0.4297],
        [ 0.5924,  0.6039, -0.8319],
        [-0.2285,  0.0837,  1.1828]])
tensor([-1.2358])


用原始的矩阵运算实现二维卷积

先不考虑batch_size维度和channel维度

In [6]:
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
        for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
            region = input[i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
            output[int(i / stride), int(j / stride)] = torch.sum(region * kernel) + bias  # 点乘 并赋值给输出位置的元素

    return output

结果验证：

In [7]:
# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1, stride=2)
print(mat_mul_conv_output)

# 调用pytorch api卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape(1, 1, input.shape[0], input.shape[1]),
                                   kernel.reshape(1, 1, kernel.shape[0], kernel.shape[1]),
                                   padding=1,
                                   bias=bias,
                                   stride=2)
print(pytorch_api_conv_output.squeeze())

torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)  # 验证成功

tensor([[-1.2841,  1.1195, -1.8631],
        [-2.7829,  1.6947, -5.0187],
        [ 0.2356, -6.0676,  1.4359]])
tensor([[-1.2841,  1.1195, -1.8631],
        [-2.7829,  1.6947, -5.0187],
        [ 0.2356, -6.0676,  1.4359]])


True

region拉直, flatten版本:

In [8]:
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵

    region_matrix = torch.zeros(output.numel(), kernel.numel())  # 存储着拉平后的所有特征区域
    kernel_matrix = kernel.reshape(kernel.numel(), 1)  # kernel的列向量（矩阵）形式
    row_index = 0
    for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
        for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
            region = input[i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
            region_vector = torch.flatten(region)
            region_matrix[row_index] = region_vector
            row_index += 1

    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape(output_h, output_w) + bias

    return output

In [9]:
# flatten版 实现卷积的结果
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1, stride=2)
print(mat_mul_conv_output_flatten)

# 调用pytorch api卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape(1, 1, input.shape[0], input.shape[1]),
                                   kernel.reshape(1, 1, kernel.shape[0], kernel.shape[1]),
                                   padding=1,
                                   bias=bias,
                                   stride=2)
print(pytorch_api_conv_output.squeeze())

torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)  # 验证成功

tensor([[-1.2841,  1.1195, -1.8631],
        [-2.7829,  1.6947, -5.0187],
        [ 0.2356, -6.0676,  1.4359]])
tensor([[-1.2841,  1.1195, -1.8631],
        [-2.7829,  1.6947, -5.0187],
        [ 0.2356, -6.0676,  1.4359]])


True

考虑batch_size维和channel维度的完整实现

In [10]:
def matrix_multiplication_for_conv2d_full(input, kernel, bias=None, stride=1, padding=0):
    # input, kernel都是四维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))

    bs, in_channel, input_h, input_w = input.shape
    output_channel, in_channel, kernel_h, kernel_w = kernel.shape

    if bias is None:
        bias = torch.zeros(output_channel)

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(bs, output_channel, output_h, output_w)  # 初始化输出矩阵

    for idx in range(bs):
        for oc in range(output_channel):
            for ic in range(in_channel):
                for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
                    for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
                        region = input[idx, ic, i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
                        output[idx, oc, int(i / stride), int(j / stride)] += torch.sum(region * kernel[oc, ic])  # 点乘 并赋值给输出位置的元素
            output[idx, oc] += bias[oc]

    return output

In [11]:
input = torch.randn(2 ,2, 5, 5)  # bs * in_channel * in_h * in_w
kernel = torch.randn(3, 2, 3, 3)  # out_channel * in_channel * kernel_h * kernel_w
bias = torch.randn(3)

pytorch_conv2d_api_output = F.conv2d(input, kernel, bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

torch.allclose(mm_conv2d_full_output, pytorch_conv2d_api_output)

True