## 二维卷积代码演示

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from math import floor

定义常量

In [2]:
in_channels = 1
out_channels = 1
kernel_size = 3
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4]

In [3]:
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
input_feature_map = torch.randn(input_size)
output_feature_map = conv_layer(input_feature_map)

print(input_feature_map)
print(conv_layer.weight)
print('conv shape: ', conv_layer.weight.shape)  # out_channels * in_channels * height * width
print(output_feature_map)

tensor([[[[ 0.5804, -1.6252,  0.9928, -0.7315],
          [-0.0361, -0.1881,  0.2918,  0.5103],
          [ 1.3324, -0.4831,  1.2877, -0.4636],
          [ 0.9709, -0.3212, -0.8290,  0.4942]]]])
Parameter containing:
tensor([[[[ 0.2411,  0.3101, -0.1773],
          [ 0.0611,  0.3279,  0.3087],
          [-0.0889, -0.1766,  0.2767]]]], requires_grad=True)
conv shape:  torch.Size([1, 1, 3, 3])
tensor([[[[-0.1906, -0.0254],
          [-0.0572,  0.5159]]]], grad_fn=<ConvolutionBackward0>)


In [4]:
output_feature_map1  = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map1)

tensor([[[[-0.1906, -0.0254],
          [-0.0572,  0.5159]]]], grad_fn=<ConvolutionBackward0>)


### 实现卷积

In [5]:
input = torch.randn(5, 5)  # 卷积输入特征图
kernel = torch.randn(3, 3)  # 卷积核
bias = torch.randn(1)  # 卷积偏置 默认输出通道数目为1

print(input)
print(kernel)
print(bias)

tensor([[ 1.0637,  0.7187,  0.5201,  0.5979, -0.1413],
        [-0.1349, -0.5641, -1.3668,  0.9945, -0.6915],
        [-0.8417, -0.0941, -1.2134,  1.6760, -0.0925],
        [-0.0513,  0.0584, -1.2078,  0.7728, -0.1496],
        [-0.5619, -0.6656, -1.0240,  0.6268,  1.6081]])
tensor([[-0.9862, -0.4309, -1.7411],
        [-1.6268,  1.1117, -1.3009],
        [-1.1477, -1.9942, -0.4259]])
tensor([-0.2516])


用原始的矩阵运算实现二维卷积

先不考虑batch_size维度和channel维度

In [6]:
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
        for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
            region = input[i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
            output[int(i / stride), int(j / stride)] = torch.sum(region * kernel) + bias  # 点乘 并赋值给输出位置的元素

    return output

结果验证：

In [7]:
# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1, stride=2)
print(mat_mul_conv_output)

# 调用pytorch api卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape(1, 1, input.shape[0], input.shape[1]),
                                   kernel.reshape(1, 1, kernel.shape[0], kernel.shape[1]),
                                   padding=1,
                                   bias=bias,
                                   stride=2)
print(pytorch_api_conv_output.squeeze())

torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)  # 验证成功

tensor([[ 0.5052,  1.3291, -1.1439],
        [ 0.0529, -2.2017, -4.3525],
        [-0.0900, -2.0053, -0.1812]])
tensor([[ 0.5052,  1.3291, -1.1439],
        [ 0.0529, -2.2017, -4.3525],
        [-0.0900, -2.0053, -0.1812]])


True

region拉直, flatten版本:

In [8]:
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵

    region_matrix = torch.zeros(output.numel(), kernel.numel())  # 存储着拉平后的所有特征区域
    kernel_matrix = kernel.reshape(kernel.numel(), 1)  # kernel的列向量（矩阵）形式
    row_index = 0
    for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
        for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
            region = input[i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
            region_vector = torch.flatten(region)
            region_matrix[row_index] = region_vector
            row_index += 1

    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape(output_h, output_w) + bias

    return output

In [9]:
# flatten版 实现卷积的结果
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1, stride=2)
print(mat_mul_conv_output_flatten)

# 调用pytorch api卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape(1, 1, input.shape[0], input.shape[1]),
                                   kernel.reshape(1, 1, kernel.shape[0], kernel.shape[1]),
                                   padding=1,
                                   bias=bias,
                                   stride=2)
print(pytorch_api_conv_output.squeeze())

torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)  # 验证成功

tensor([[ 0.5052,  1.3291, -1.1439],
        [ 0.0529, -2.2017, -4.3525],
        [-0.0900, -2.0053, -0.1812]])
tensor([[ 0.5052,  1.3291, -1.1439],
        [ 0.0529, -2.2017, -4.3525],
        [-0.0900, -2.0053, -0.1812]])


True

考虑batch_size维和channel维度的完整实现

In [10]:
def matrix_multiplication_for_conv2d_full(input, kernel, bias=None, stride=1, padding=0):
    # input, kernel都是四维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))

    bs, in_channel, input_h, input_w = input.shape
    output_channel, in_channel, kernel_h, kernel_w = kernel.shape

    if bias is None:
        bias = torch.zeros(output_channel)

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(bs, output_channel, output_h, output_w)  # 初始化输出矩阵

    for idx in range(bs):
        for oc in range(output_channel):
            for ic in range(in_channel):
                for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
                    for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
                        region = input[idx, ic, i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
                        output[idx, oc, int(i / stride), int(j / stride)] += torch.sum(region * kernel[oc, ic])  # 点乘 并赋值给输出位置的元素
            output[idx, oc] += bias[oc]

    return output

In [11]:
input = torch.randn(2 ,2, 5, 5)  # bs * in_channel * in_h * in_w
kernel = torch.randn(3, 2, 3, 3)  # out_channel * in_channel * kernel_h * kernel_w
bias = torch.randn(3)

pytorch_conv2d_api_output = F.conv2d(input, kernel, bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

torch.allclose(mm_conv2d_full_output, pytorch_conv2d_api_output)

True

对kernel进行展开来实现二维卷积 并推导出转置卷积: 先不考虑batch和channel, 不考虑padding, 假设stride=1

In [12]:
def get_kernel_matrix(kernel, input_size):
    """ 基于kernel和输入特征图的大小来得到填充拉直后的kernel堆叠后的矩阵 """
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size
    num_out_feat_map = (input_h - kernel_h + 1) * (input_w - kernel_w + 1)
    result = torch.zeros(num_out_feat_map, input_h * input_w)  # 初始化结果矩阵 输出特征图元素个数 * 输入特征图元素个数
    count = 0
    for i in range(0, input_h - kernel_h + 1, 1):
        for j in range(0, input_w - kernel_w + 1, 1):
            padded_kernel = F.pad(kernel, (j, input_w - kernel_w - j, i, input_h - kernel_h - i))  # 填充成跟输入特征图一样大小
            result[count] = padded_kernel.flatten()
            count += 1

    return result

In [13]:
kernel = torch.randn(3, 3)
input = torch.randn(4, 4)
kernel_matrix = get_kernel_matrix(kernel, input.shape)  # 4*16

print(kernel)
print(kernel_matrix)

tensor([[ 0.9637, -1.6034, -1.6446],
        [ 2.0865, -0.6464, -0.1253],
        [ 1.0258,  0.3600, -2.2583]])
tensor([[ 0.9637, -1.6034, -1.6446,  0.0000,  2.0865, -0.6464, -0.1253,  0.0000,
          1.0258,  0.3600, -2.2583,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.9637, -1.6034, -1.6446,  0.0000,  2.0865, -0.6464, -0.1253,
          0.0000,  1.0258,  0.3600, -2.2583,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.9637, -1.6034, -1.6446,  0.0000,
          2.0865, -0.6464, -0.1253,  0.0000,  1.0258,  0.3600, -2.2583,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.9637, -1.6034, -1.6446,
          0.0000,  2.0865, -0.6464, -0.1253,  0.0000,  1.0258,  0.3600, -2.2583]])


验证二维卷积

In [14]:
mm_conv2d_output = kernel_matrix @ input.reshape(-1, 1)
pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0), kernel.unsqueeze(0).unsqueeze(0))

print(mm_conv2d_output)
print(pytorch_conv2d_output)

tensor([[-3.8122],
        [ 1.2780],
        [-5.0330],
        [-5.7984]])
tensor([[[[-3.8122,  1.2780],
          [-5.0330, -5.7984]]]])


验证二维转置卷积

In [15]:
mm_transposed_conv2d_output = kernel_matrix.transpose(-1, -2) @ mm_conv2d_output
pytorch_transposed_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output, kernel.unsqueeze(0).unsqueeze(0))

print(mm_transposed_conv2d_output.reshape(4, 4))
print(pytorch_transposed_conv2d_output)

tensor([[ -3.6737,   7.3438,   4.2203,  -2.1018],
        [-12.8044,   7.6127,  17.2256,   9.3757],
        [-14.4118,  -8.9066,  13.4481,  -2.1595],
        [ -5.1626,  -7.7596,   9.2787,  13.0947]])
tensor([[[[ -3.6737,   7.3438,   4.2203,  -2.1018],
          [-12.8044,   7.6127,  17.2256,   9.3757],
          [-14.4118,  -8.9066,  13.4481,  -2.1595],
          [ -5.1626,  -7.7596,   9.2787,  13.0947]]]])
