In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

batch_size = 1
in_channels = 1
out_channels = 1
kernel_size = 3
bias = False
input_size = [batch_size, in_channels, 4, 4]

conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size, bias = bias)
input_feature_map = torch.randn(input_size)
output_feature_map = conv_layer(input_feature_map)

print(input_feature_map)
print(conv_layer.weight)  #1*3*3*3 = out_channels*in_channels*height*width
print(output_feature_map)

#使用nn.functional中的卷积，作为函数调用
output_feature_map1 = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map1)



tensor([[[[-0.5313,  0.2610, -0.7020, -1.0472],
          [ 1.6464, -0.7453,  0.6006, -0.4810],
          [ 0.2911, -0.3596,  0.0160, -0.5955],
          [-0.7642, -0.5402, -0.7338, -0.5263]]]])
Parameter containing:
tensor([[[[ 0.0543, -0.2173, -0.0795],
          [-0.0767,  0.3148, -0.2659],
          [-0.2994, -0.0280, -0.0093]]]], requires_grad=True)
tensor([[[[-0.6276,  0.7368],
          [ 0.3146,  0.2455]]]], grad_fn=<ThnnConv2DBackward>)
tensor([[[[-0.6276,  0.7368],
          [ 0.3146,  0.2455]]]], grad_fn=<ThnnConv2DBackward>)


In [23]:
# 17手写滑动相乘
import math
input = torch.randn(5, 5)
kernel = torch.randn(3, 3)
bias = torch.randn(1) #偏置，默认通道为1

#step1 用原始的矩阵运算实现二维卷积,先不考虑batch_size和channel
def matrix_multiplication_for_conv2d(input, kernel,bias = 0,  stride = 1, padding = 0):

    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape


    output_h = (math.floor((input_h - kernel_h ) /stride ) + 1)
    output_w = (math.floor((input_w - kernel_w ) /stride ) + 1)
    output = torch.zeros(output_h, output_w) #初始化输出矩阵
    for i in range(0, input_h - kernel_h + 1, stride):  #对高度维进行遍历
        for j in range(0, input_w - kernel_w + 1, stride): #对宽度维进行遍历
            region  = input[i : i + kernel_h , j : j + kernel_w]
            output[int(i / stride), int(j/stride)] = torch.sum(region * kernel) + bias#逐元素相乘
    return output
#矩阵运算实现卷积结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, padding = 1,bias = bias)
print(mat_mul_conv_output)

#调用API结果
pytorcg_api_conv_output = F.conv2d(input.reshape(1,1, input.shape[0], input.shape[1]),\
    kernel.reshape(1,1,kernel.shape[0],kernel.shape[1]),\
        padding=1,bias = bias)
print(pytorcg_api_conv_output.squeeze(0).squeeze(0))

tensor([[-1.4542, -4.8156,  3.3035,  1.6480,  1.7177],
        [-1.9997,  0.5993,  2.1027,  1.5318, -0.3574],
        [ 1.0011,  9.5139,  0.3971, -0.2130, -0.8853],
        [ 2.7897, -2.0240,  0.3914,  2.1176,  3.5692],
        [ 0.4185,  0.7012,  1.4854,  0.1911, -1.4043]])
tensor([[-1.4542, -4.8156,  3.3035,  1.6480,  1.7177],
        [-1.9997,  0.5993,  2.1027,  1.5318, -0.3574],
        [ 1.0011,  9.5139,  0.3971, -0.2130, -0.8853],
        [ 2.7897, -2.0240,  0.3914,  2.1176,  3.5692],
        [ 0.4185,  0.7012,  1.4854,  0.1911, -1.4043]])


In [26]:

import torch
import torch.nn as nn
import torch.nn.functional as  F
# 演示torch.flatten
a = torch.randn(2, 3)
a

torch.flatten(a) #展平成为1维

#numel
a = torch.randn(2, 3)
a.numel()

6

In [28]:
# 18手写滑动相乘
import math
input = torch.randn(5, 5)
kernel = torch.randn(3, 3)
bias = torch.randn(1) #偏置，默认通道为1

#step2 内积实现
def matrix_multiplication_for_conv2d_flatten(input, kernel,bias = 0,  stride = 1, padding = 0):

    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))

    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape


    output_h = (math.floor((input_h - kernel_h ) /stride ) + 1)
    output_w = (math.floor((input_w - kernel_w ) /stride ) + 1)
    output = torch.zeros(output_h, output_w) #初始化输出矩阵

    region_matrix = torch.zeros(output.numel(), kernel.numel())#存储所有拉平后的特征区域
    kernel_matrix = kernel.reshape(kernel.numel(), 1)
    for i in range(0, input_h - kernel_h + 1, stride):  #对高度维进行遍历
        for j in range(0, input_w - kernel_w + 1, stride): #对宽度维进行遍历
            region  = input[i : i + kernel_h , j : j + kernel_w]
            region_vector = torch.flatten(region)
            region_matrix[i * output_w+j] = region_vector

            output_matrix = region_matrix @ kernel_matrix
            output = output_matrix.reshape(output_h, output_w) + bias
            #output[int(i / stride), int(j/stride)] = torch.sum(region * kernel) + bias#逐元素相乘
    return output
#矩阵运算实现卷积结果
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, padding = 1,bias = bias)
print(mat_mul_conv_output_flatten)

#调用API结果
pytorch_api_conv_output = F.conv2d(input.reshape(1,1, input.shape[0], input.shape[1]),\
    kernel.reshape(1,1,kernel.shape[0],kernel.shape[1]),\
        padding=1,bias = bias).squeeze(0).squeeze(0)
flag = torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)
flag

tensor([[-0.3577,  3.1594,  5.0397,  3.2003, -0.7363],
        [ 3.1394,  1.6667,  3.8867,  1.7496, -0.1544],
        [ 4.8046, -1.4344, -0.8525,  3.1343,  1.1117],
        [ 1.6232,  0.6599, -3.4425,  3.2378,  0.5643],
        [ 3.8436,  1.8094, -1.5216, -0.3772,  0.2371]])


True

In [40]:
#! https://zhuanlan.zhihu.com/p/555265996
# 引入batch_size和channel维度
import math
input = torch.randn(2, 2, 5, 5)
kernel = torch.randn(3, 2, 3, 3)
bias = torch.randn(3) #偏置，默认通道为1

#step1 用原始的矩阵运算实现二维卷积,先不考虑batch_size和channel
def matrix_multiplication_for_conv2d_full(input, kernel,bias = 0,  stride = 1, padding = 0):

    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding,0,0,0,0))

    bs, in_channel, input_h, input_w = input.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channel)

    output_h = (math.floor((input_h - kernel_h ) /stride ) + 1)
    output_w = (math.floor((input_w - kernel_w ) /stride ) + 1)
    output = torch.zeros(bs, out_channel, output_h, output_w) #初始化输出矩阵

    for ind in range(bs):    
        for oc in range(out_channel):
            for ic in range(in_channel):
                for i in range(0, input_h - kernel_h + 1, stride):  #对高度维进行遍历
                    for j in range(0, input_w - kernel_w + 1, stride): #对宽度维进行遍历
                        region  = input[ind, ic, i : i + kernel_h , j : j + kernel_w]
                        output[ind, oc, int(i / stride), int(j/stride)] += torch.sum(region * kernel[oc,ic]) #逐元素相乘

            output[ind, oc] +=bias[oc]
    return output
#矩阵运算实现卷积结果
mat_mul_conv_output = matrix_multiplication_for_conv2d_full(input, kernel, padding = 1,bias = bias,stride = 2)
#print(mat_mul_conv_output)

#调用API结果
pytorcg_api_conv_output = F.conv2d(input,kernel,padding=1,bias = bias,stride = 2)
#print(pytorcg_api_conv_output)
flag = torch.isclose(mat_mul_conv_output, pytorcg_api_conv_output)
#flag

In [57]:
#step4：通过对kernel进行展开并实现二维卷积，并推导出转置卷积,不考虑batch\channel\padding,sride = 1
def get_kernel_matrix(kernel, input_size):
    '''基于kernel和输入特征图的大小来得到填充拉直kernel堆叠后的矩阵'''
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size
    num_out_feat_map = (input_h - kernel_h + 1) * (input_w - kernel_w + 1)
    result = torch.zeros((num_out_feat_map, input_h * input_w)) #初始化结果矩阵，输出特征图元素个数 * 输入特征图元素个数
    for i in range(0, input_h-kernel_h+1, 1):
        for j in range(0, input_w - kernel_w + 1, 1):#填充成跟输入特征图一样大小
            #pad从里向外填充，顺序是左列，右列，上行，下行
            padded_kernel = F.pad(kernel, (i, input_h-kernel_h-i, j, input_w-kernel_w-j))
            result[i*(input_w - kernel_w + 1) + j] = padded_kernel.flatten()

    return result
#测试1：验证二维卷积
kernel = torch.randn(3, 3)
input = torch.randn(4, 4) #输出4行16列 2x2=4, 4x4=16
kernel_matrix = get_kernel_matrix(kernel, input.shape)
mm_conov2d_output = kernel_matrix @ input.reshape(-1, 1)  #矩阵相乘算出卷积
mm_conov2d_output
pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0), kernel.unsqueeze(0).unsqueeze(0))

'''总之，两者都是用来重塑tensor的shape的。
view只适合对满足连续性条件（contiguous）的tensor进行操作，
而reshape同时还可以对不满足连续性条件的tensor进行操作，具有更好的鲁棒性。
view能干的reshape都能干，如果view不能干就可以用reshape来处理。
'''
#print(mm_conov2d_output).reshape(2,2)
#print(pytorch_conv2d_output.squeeze(0).squeeze(0))


#测试2：验证二维转置卷积,实现上采样
'''
把上面得到的kernel_matrix转置变成16x4  4x1，两个矩阵相乘就是16*1也就是4x4
本质就是后向传播y=wx dy/dx=w^T
'''
mm_transposed_conv2d_output = kernel_matrix.transpose(-1,-2) @ mm_conov2d_output
mm_transposed_conv2d_output= mm_transposed_conv2d_output.reshape(4 , 4)

print(mm_transposed_conv2d_output)

#pytorch torch.nn.ConvTranspose2d
pytorch_transposed_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output,kernel.unsqueeze(0).unsqueeze(0) )
print(pytorch_transposed_conv2d_output)


tensor([[-4.0064, -7.0201, -1.7938,  0.1585],
        [ 6.6041, -3.2594, -6.4683,  1.4094],
        [ 1.8870, 15.0458,  0.0467, -3.5150],
        [-3.8525, -5.3362,  5.0689, -0.6575]])
tensor([[[[-4.0064, -7.0201, -1.7938,  0.1585],
          [ 6.6041, -3.2594, -6.4683,  1.4094],
          [ 1.8870, 15.0458,  0.0467, -3.5150],
          [-3.8525, -5.3362,  5.0689, -0.6575]]]])


In [60]:
# 空洞卷积

import torch
import torch.nn as nn
import torch.nn.functional as F
a = torch.randn(7,7)
a

tensor([[ 1.1445, -0.0805, -0.8505,  0.7367, -0.2037,  1.3674,  0.9186],
        [-0.3654, -1.0221, -0.2708, -0.2582,  1.6529, -0.4387,  0.7612],
        [ 0.9434, -0.6605, -0.4336,  1.6358, -1.1560,  0.6988,  1.2185],
        [-0.4670, -0.3864, -1.8162, -1.4449, -0.7560, -0.9054,  0.7602],
        [ 0.2413, -0.1736,  0.9102, -0.7934,  0.6402, -0.0801, -0.4665],
        [ 0.4164,  0.0931, -0.2639, -0.3033,  1.7619, -0.4075,  0.9078],
        [ 0.2184, -1.6596, -0.8298, -2.0634, -0.5273,  0.1174,  1.3770]])

In [61]:
a[0:3, 0:3] #如果和3x3卷积核
a[0:3, 0:3] #dilation = 1
a[0:5:2, 0:5:2] #dilation = 2 在不增加计算量的情况下，增加感受野的面积
a[0:7:3, 0:7:3] #dilation = 3


tensor([[ 1.1445, -0.0805, -0.8505],
        [-0.3654, -1.0221, -0.2708],
        [ 0.9434, -0.6605, -0.4336]])

In [None]:
#group > 1，把一个大卷积看出多个小卷积，通道融合不需要完全充分，只在group内融合
#后面加一个1*1 pointwise convolution即可，就是前面说的convmixer的方法
in_channel, out_channel = 2, 4
group = 2
sub_in_channel, sub_out_channel = 1, 2 #2组一共4个卷积核，相比上面8个卷积核少了一半，没有考虑通道融合


In [76]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
def matrix_multiplication_for_conv2d_final(input, kernel, bias = None, stride = 1, padding = 0, dilation = 1, groups = 1):
    if padding > 0:
        input = F.pad(input, (padding,padding,padding,padding,0,0,0,0))
    bs, in_channel, input_h, input_w = input.shape    
    out_channel,_, kernel_h, kernel_w = kernel.shape

    assert out_channel % groups == 0 and in_channel % groups == 0,"groups必须要同时被通道数整除！"
    input = input.reshape((bs, groups,in_channel//groups,input_h, input_w))

    kernel = kernel.reshape((groups, out_channel//groups, in_channel//groups, kernel_h, kernel_w))
    
    kernel_h = (kernel_h-1) *(dilation-1) + kernel_h#后面是卷积作用的点，前面是空洞的数目，加起来就是作用范围
    kernel_w = (kernel_w-1) *(dilation-1) + kernel_w

    output_h = math.floor((input_h-kernel_h)/stride)+1
    output_w = math.floor((input_w-kernel_w)/stride)+1
    output_shape = (bs, groups, out_channel//groups, output_h, output_w)
    output = torch.zeros(output_shape)

    if bias is None:
        bias = torch.zeros(out_channel)

    for ind in range(bs): #对batch_size进行遍历
        for g in range(groups): #对群组进行遍历
            for oc in range(out_channel//groups): #对分组后的通道进行遍历
                for ic in range(in_channel//groups): #对分组后的输入通道进行遍历
                    for i in range(0, input_h-kernel_h+1, stride):
                        for j in range(0, input_w-kernel_w+1, stride):
                            region = input[ind, g, ic, i:i+kernel_h:dilation,j:j+kernel_w:dilation] #特征区域
                            output[ind, g, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[g, oc, ic])
                output[ind, g, oc] += bias[g*(out_channel//groups)+oc ]#考虑偏置
    
    output = output.reshape((bs,out_channel, output_h,output_w))#还原成4维
    return output

#开始验证测试
kernel_size = 3
bs, in_channel, input_h, input_w = 2, 2, 5, 5
out_channel = 4
groups ,dilation, stride, padding= 2 ,2 ,2, 1
input = torch.randn(bs,in_channel,input_h,input_w)
kernel = torch.randn(out_channel,in_channel//groups,kernel_size, kernel_size )
bias = torch.randn(out_channel)

pytorch_conv2d_api_output = \
    F.conv2d(input, kernel, bias=bias,padding=padding,stride=stride,dilation=dilation,groups=groups)

mm_conv2d_final_output = matrix_multiplication_for_conv2d_final\
    (input, kernel, bias = bias, stride = stride, padding = padding, dilation = dilation, groups = groups)

flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_final_output)
flag

True