## 二维卷积代码演示

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from math import floor

定义常量

In [2]:
in_channels = 1
out_channels = 1
kernel_size = 3
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4]

In [3]:
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
input_feature_map = torch.randn(input_size)
output_feature_map = conv_layer(input_feature_map)

print(input_feature_map)
print(conv_layer.weight)
print('conv shape: ', conv_layer.weight.shape)  # out_channels * in_channels * height * width
print(output_feature_map)

tensor([[[[ 0.0154,  0.0635, -0.3639, -0.5033],
          [-0.5038, -0.2192, -1.4445, -0.0946],
          [-0.0067, -0.6616, -0.6039, -0.4955],
          [ 0.7326, -1.1545, -0.3996, -0.1910]]]])
Parameter containing:
tensor([[[[ 0.3260, -0.0833,  0.3098],
          [-0.0076,  0.1136, -0.1804],
          [ 0.2704,  0.1957, -0.0588]]]], requires_grad=True)
conv shape:  torch.Size([1, 1, 3, 3])
tensor([[[[ 0.0306, -0.5182],
          [-0.5641, -0.3338]]]], grad_fn=<ConvolutionBackward0>)


In [4]:
output_feature_map1  = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map1)

tensor([[[[ 0.0306, -0.5182],
          [-0.5641, -0.3338]]]], grad_fn=<ConvolutionBackward0>)


### 实现卷积

In [5]:
input = torch.randn(5, 5)  # 卷积输入特征图
kernel = torch.randn(3, 3)  # 卷积核
bias = torch.randn(1)  # 卷积偏置 默认输出通道数目为1

print(input)
print(kernel)
print(bias)

tensor([[-0.6325, -0.2957,  0.5462, -1.9451, -0.9402],
        [ 0.4959,  1.7904, -0.5853, -0.2096,  0.3875],
        [ 0.9051,  1.3585, -0.2588,  1.7262, -1.3823],
        [ 0.4132,  0.8141,  0.0347, -0.2014, -0.8545],
        [ 0.2931,  0.4612,  1.9812,  1.2929,  0.9103]])
tensor([[-0.7825,  2.0124,  0.4009],
        [-0.0393, -0.4620, -0.4053],
        [ 1.2382, -0.1169,  0.9682]])
tensor([-1.0259])


用原始的矩阵运算实现二维卷积

先不考虑batch_size维度和channel维度

In [6]:
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
        for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
            region = input[i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
            output[int(i / stride), int(j / stride)] = torch.sum(region * kernel) + bias  # 点乘 并赋值给输出位置的元素

    return output

结果验证：

In [7]:
# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1, stride=2)
print(mat_mul_conv_output)

# 调用pytorch api卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape(1, 1, input.shape[0], input.shape[1]),
                                   kernel.reshape(1, 1, kernel.shape[0], kernel.shape[1]),
                                   padding=1,
                                   bias=bias,
                                   stride=2)
print(pytorch_api_conv_output.squeeze())

torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)  # 验证成功

tensor([[ 1.0617,  1.6041, -0.8199],
        [ 0.4610, -3.5135,  0.3393],
        [-0.1904, -3.1314, -3.0592]])
tensor([[ 1.0617,  1.6041, -0.8199],
        [ 0.4610, -3.5135,  0.3393],
        [-0.1904, -3.1314, -3.0592]])


True

region拉直, flatten版本:

In [8]:
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵

    region_matrix = torch.zeros(output.numel(), kernel.numel())  # 存储着拉平后的所有特征区域
    kernel_matrix = kernel.reshape(kernel.numel(), 1)  # kernel的列向量（矩阵）形式
    row_index = 0
    for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
        for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
            region = input[i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
            region_vector = torch.flatten(region)
            region_matrix[row_index] = region_vector
            row_index += 1

    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape(output_h, output_w) + bias

    return output

In [9]:
# flatten版 实现卷积的结果
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1, stride=2)
print(mat_mul_conv_output_flatten)

# 调用pytorch api卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape(1, 1, input.shape[0], input.shape[1]),
                                   kernel.reshape(1, 1, kernel.shape[0], kernel.shape[1]),
                                   padding=1,
                                   bias=bias,
                                   stride=2)
print(pytorch_api_conv_output.squeeze())

torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)  # 验证成功

tensor([[ 1.0617,  1.6041, -0.8199],
        [ 0.4610, -3.5135,  0.3393],
        [-0.1904, -3.1314, -3.0592]])
tensor([[ 1.0617,  1.6041, -0.8199],
        [ 0.4610, -3.5135,  0.3393],
        [-0.1904, -3.1314, -3.0592]])


True

考虑batch_size维和channel维度的完整实现

In [10]:
def matrix_multiplication_for_conv2d_full(input, kernel, bias=None, stride=1, padding=0):
    # input, kernel都是四维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))

    bs, in_channel, input_h, input_w = input.shape
    output_channel, in_channel, kernel_h, kernel_w = kernel.shape

    if bias is None:
        bias = torch.zeros(output_channel)

    output_h = floor((input_h - kernel_h) / stride + 1)  # 卷积输入的高度
    output_w = floor((input_w - kernel_w) / stride + 1)  # 卷积输入的宽度
    output = torch.zeros(bs, output_channel, output_h, output_w)  # 初始化输出矩阵

    for idx in range(bs):
        for oc in range(output_channel):
            for ic in range(in_channel):
                for i in range(0, input_h - kernel_h + 1, stride):  # 对高度进行遍历
                    for j in range(0, input_w - kernel_w + 1, stride):  # 对宽度进行遍历
                        region = input[idx, ic, i:i + kernel_h, j:j + kernel_h]  # 取出被核滑动到的区域
                        output[idx, oc, i // stride, j // stride] += torch.sum(region * kernel[oc, ic])  # 点乘 并赋值给输出位置的元素
            output[idx, oc] += bias[oc]

    return output

In [11]:
input = torch.randn(2 ,2, 5, 5)  # bs * in_channel * in_h * in_w
kernel = torch.randn(3, 2, 3, 3)  # out_channel * in_channel * kernel_h * kernel_w
bias = torch.randn(3)

pytorch_conv2d_api_output = F.conv2d(input, kernel, bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

torch.allclose(mm_conv2d_full_output, pytorch_conv2d_api_output)

True

对kernel进行展开来实现二维卷积 并推导出转置卷积: 先不考虑batch和channel, 不考虑padding, 假设stride=1

In [12]:
def get_kernel_matrix(kernel, input_size):
    """ 基于kernel和输入特征图的大小来得到填充拉直后的kernel堆叠后的矩阵 """
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size
    num_out_feat_map = (input_h - kernel_h + 1) * (input_w - kernel_w + 1)
    result = torch.zeros(num_out_feat_map, input_h * input_w)  # 初始化结果矩阵 输出特征图元素个数 * 输入特征图元素个数
    count = 0
    for i in range(0, input_h - kernel_h + 1, 1):
        for j in range(0, input_w - kernel_w + 1, 1):
            padded_kernel = F.pad(kernel, (j, input_w - kernel_w - j, i, input_h - kernel_h - i))  # 填充成跟输入特征图一样大小
            result[count] = padded_kernel.flatten()
            count += 1

    return result

In [13]:
kernel = torch.randn(3, 3)
input = torch.randn(4, 4)
kernel_matrix = get_kernel_matrix(kernel, input.shape)  # 4*16

print(kernel)
print(kernel_matrix)

tensor([[ 0.0555,  1.1662,  0.1779],
        [ 0.0437, -0.2212, -0.0591],
        [ 0.2425, -0.4909,  1.4868]])
tensor([[ 0.0555,  1.1662,  0.1779,  0.0000,  0.0437, -0.2212, -0.0591,  0.0000,
          0.2425, -0.4909,  1.4868,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0555,  1.1662,  0.1779,  0.0000,  0.0437, -0.2212, -0.0591,
          0.0000,  0.2425, -0.4909,  1.4868,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0555,  1.1662,  0.1779,  0.0000,
          0.0437, -0.2212, -0.0591,  0.0000,  0.2425, -0.4909,  1.4868,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0555,  1.1662,  0.1779,
          0.0000,  0.0437, -0.2212, -0.0591,  0.0000,  0.2425, -0.4909,  1.4868]])


验证二维卷积

In [14]:
mm_conv2d_output = kernel_matrix @ input.reshape(-1, 1)
pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0), kernel.unsqueeze(0).unsqueeze(0))

print(mm_conv2d_output)
print(pytorch_conv2d_output)

tensor([[ 1.6035],
        [ 1.4561],
        [-0.0267],
        [ 0.3003]])
tensor([[[[ 1.6035,  1.4561],
          [-0.0267,  0.3003]]]])


验证二维转置卷积

In [15]:
mm_transposed_conv2d_output = kernel_matrix.transpose(-1, -2) @ mm_conv2d_output
pytorch_transposed_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output, kernel.unsqueeze(0).unsqueeze(0))

print(mm_transposed_conv2d_output.reshape(4, 4))
print(pytorch_transposed_conv2d_output)

tensor([[ 0.0889,  1.9508,  1.9834,  0.2591],
        [ 0.0686, -0.3055, -0.0714, -0.0326],
        [ 0.3876, -0.4150,  1.6045,  2.1471],
        [-0.0065,  0.0859, -0.1871,  0.4464]])
tensor([[[[ 0.0889,  1.9508,  1.9834,  0.2591],
          [ 0.0686, -0.3055, -0.0714, -0.0326],
          [ 0.3876, -0.4150,  1.6045,  2.1471],
          [-0.0065,  0.0859, -0.1871,  0.4464]]]])


实现空洞卷积和群卷积（dilation and group）

In [16]:
def matrix_multiplication_for_conv2d_final(input, kernel, bias=None, stride=1, padding=0, dilation=1, groups=1):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))

    bs, in_channel, input_h, input_w = input.shape
    out_channel, _, kernel_h, kernel_w = kernel.shape

    assert out_channel % groups == 0 and in_channel % groups == 0, "groups必须要同时被输入通道数和输出通道数整除!"
    input = input.reshape(bs, groups, in_channel // groups, input_h, input_w)
    kernel = kernel.reshape(groups, out_channel // groups, in_channel // groups, kernel_h, kernel_w)

    kernel_h = (kernel_h - 1) * (dilation - 1) + kernel_h
    kernel_w = (kernel_w - 1) * (dilation - 1) + kernel_w

    output_h = floor((input_h - kernel_h) / stride) + 1
    output_w = floor((input_w - kernel_w) / stride) + 1
    output_shape = (bs, groups, out_channel // groups, output_h, output_w)
    output = torch.zeros(output_shape)

    if bias is None:
        bias = torch.zeros(out_channel)

    for idx in range(bs):
        for g in range(groups):
            for oc in range(out_channel // groups):
                for ic in range(in_channel // groups):
                    for i in range(0, input_h - kernel_h + 1, stride):
                        for j in range(0, input_w - kernel_w + 1, stride):
                            region = input[idx, g, ic, i:i + kernel_h:dilation, j:j + kernel_w:dilation]
                            output[idx, g, oc, i // stride, j // stride] += torch.sum(region * kernel[g, oc, ic])
                output[idx, g, oc] += bias[g * (out_channel // groups) + oc]

    output = output.reshape(bs, out_channel, output_h, output_w)
    return output

In [17]:
kernel_size = 3
bs, in_channel, input_h, input_w = 2, 2, 7, 7
out_channel = 4
groups, dilation, stride, padding = 2, 2, 2, 1

input = torch.randn(bs, in_channel, input_h, input_w)
kernel = torch.randn(out_channel, in_channel // groups, kernel_size, kernel_size)
bias = torch.randn(out_channel)

pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=padding, stride=stride,
                                     dilation=dilation, groups=groups)

mm_conv2d_final_output = matrix_multiplication_for_conv2d_final(input, kernel, bias=bias, padding=padding,
                                                                stride=stride,dilation=dilation, groups=groups)
print(pytorch_conv2d_api_output)
print(mm_conv2d_final_output)
torch.allclose(pytorch_conv2d_api_output, mm_conv2d_final_output)

tensor([[[[-2.4419e-01, -3.0583e+00,  1.4309e+00],
          [ 1.0033e+00, -2.7890e+00, -5.5060e-01],
          [ 1.2251e+00, -1.1161e+00, -5.0134e-01]],

         [[-6.1742e-01,  1.0594e+00,  5.2264e-02],
          [-4.4655e-01,  5.5588e-01,  1.9635e+00],
          [ 3.4322e-01,  8.1468e-04,  1.9272e+00]],

         [[-3.2600e+00,  1.1603e+00,  5.6671e-01],
          [-3.4365e+00, -1.7850e+00, -2.8568e+00],
          [ 6.0163e-02,  2.3038e+00,  8.2228e-01]],

         [[-5.8294e-01,  3.6316e+00, -1.2523e-01],
          [ 8.6782e-01, -1.0744e+00, -9.0748e-01],
          [ 2.7644e+00, -3.3368e+00,  2.4442e+00]]],


        [[[ 1.1006e+00,  1.8369e+00,  7.0896e-02],
          [-1.6372e+00, -2.9309e+00, -7.3729e-01],
          [ 6.2581e-01, -5.3360e-01, -1.0085e+00]],

         [[ 3.5322e-01,  8.2231e-01,  2.2789e+00],
          [-7.3674e-01, -1.7123e+00, -2.4497e+00],
          [-6.3574e-01,  1.0733e+00,  2.9610e+00]],

         [[-3.6122e-01, -6.8838e-01,  1.6779e-02],
          [-2.667

True