## Pytorch nn.Conv2d卷积网络使用教程

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [2]:
# 定义参数
in_channels = 1
out_channels = 1
kernel_size = 3
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4]

# 定义输入特征图
input_feature_map = torch.randn(input_size)
# 用nn.Conv2d创建卷积层
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
# 卷积运算
output_feature_map = conv_layer(input_feature_map)
print(output_feature_map)

# 函数接口实现的卷积运算
output_feature_map = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map)


tensor([[[[0.8740, 0.0408],
          [0.8836, 0.6468]]]], grad_fn=<ConvolutionBackward0>)
tensor([[[[0.8740, 0.0408],
          [0.8836, 0.6468]]]], grad_fn=<ConvolutionBackward0>)


## 手写并验证二维卷积

### step1 滑动窗口实现二维卷积，不考虑batch size维度和channel维度

In [3]:
input = torch.randn(5, 5) # 卷积输入特征图
kernel = torch.randn(3, 3) # 卷积核
bias = torch.randn(1) # 卷积偏置， 默认输出通道数目等于1

def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding)) # 左右上下pad
    
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    
    output_h = math.floor((input_h - kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - kernel_w) / stride) + 1 # 卷积输出的宽度
    
    output = torch.zeros(output_h, output_w) # 初始化输出矩阵
    
    for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
        for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w] # 取出被核滑动到的区域
            output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias # 点乘，并赋值给输出位置的元素
    
    return output

# 矩阵乘法实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1, stride=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, input.shape[0], input.shape[1])), \
                                   kernel.reshape((1, 1, kernel.shape[0], kernel.shape[1])), \
                                   padding=1, \
                                   bias=bias, \
                                   stride=2).squeeze()
# 验证滑动窗口的版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
print(compare_result)

True


### step2 向量乘法实现二维卷积，只flatten卷积区域版本，不考虑batch size维度和channel维度

In [4]:
input = torch.randn(5, 5) # 卷积输入特征图
kernel = torch.randn(3, 3) # 卷积核
bias = torch.randn(1) # 卷积偏置， 默认输出通道数目等于1

def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding)) # 左右上下pad
    
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    
    output_h = math.floor((input_h - kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - kernel_w) / stride) + 1 # 卷积输出的宽度
    
    region_matrix = torch.zeros(output_h * output_w, kernel.numel()) # 存储着所有的拉平后特征区域
    kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量（矩阵形式）
    
    for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
        for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w] # 取出被核滑动到的区域
            region_vector = torch.flatten(region)
            region_matrix[i//stride * output_w + j//stride] = region_vector
    
    # 矩阵相乘
    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape((output_h, output_w)) + bias
    
    return output

# 矩阵乘法实现卷积的结果，只flatten卷积区域版本
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1, stride=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, input.shape[0], input.shape[1])), \
                                   kernel.reshape((1, 1, kernel.shape[0], kernel.shape[1])), \
                                   padding=1, \
                                   bias=bias, \
                                   stride=2).squeeze()
# 验证矩阵乘法只flatten卷积区域版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)
print(compare_result)

True


### step3 张量乘法实现二维卷积，flatten整个input版本，完整版本

In [5]:
input = torch.randn(4, 3, 5, 5) # 卷积输入特征图
kernel = torch.randn(64, 3, 3, 3) # 卷积核
bias = torch.randn(64)

def matrix_multiplication_for_conv2d_full(input, kernel, bias=None, stride=1, padding=0):
    # input, kernel都是4维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0)) # 左右、上下、channel、batch size
    
    bs, in_channel, input_h, input_w = input.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channel)
    
    output_h = math.floor((input_h - kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - kernel_w) / stride) + 1 # 卷积输出的宽度
    
    kernel_matrix = torch.zeros((out_channel, in_channel * input_h * input_w, output_h * output_w))
    
    for k in range(out_channel):
        tmp_kernel_matrix = torch.zeros((in_channel, input_h, input_w, output_h * output_w))
        for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
            for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
                # 卷积区域填充卷积核参数
                tmp_kernel_matrix[:, i:i+kernel_h, j:j+kernel_w, i//stride * output_w + j//stride] = kernel[k, :, :, :]
        # 保存
        kernel_matrix[k, :, :] = tmp_kernel_matrix.reshape((-1, output_h * output_w))
    
    # 调整展开后卷积核的维度
    full_kernel_matrix = kernel_matrix.repeat(bs, 1, 1, 1)
    
    # 调整输入特征图的维度，通道、高和宽展开
    input_matrix = input.reshape((bs, 1, 1, -1)).repeat(1, out_channel, 1, 1)
    
    # 矩阵相乘
    output = torch.matmul(input_matrix, full_kernel_matrix).reshape((bs, out_channel, output_h, output_w)) + bias.reshape((1, -1, 1, 1))
    
    return output

# 矩阵乘法实现卷积的结果，flatten整个input版本
mat_mul_conv_output_full = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)

# 验证矩阵乘法flatten整个input版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output_full, pytorch_api_conv_output, rtol=0.00001, atol=0.00001)
print(compare_result)

True


## 手写并验证转置卷积

### 转置卷积的张量转置乘法版本

In [6]:
bias = torch.randn(3)

def transpose_conv_matrix_multiplication(conv_output, kernel, stride=1, padding=0, output_padding=0, bias=None):
    # 获取维度信息
    bs, out_channel, output_h, output_w = conv_output.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    # 计算卷积前特征图大小
    input_h = (output_h - 1) * stride + kernel_h
    input_w = (output_w - 1) * stride + kernel_w
    if bias is None:
        bias = torch.zeros(in_channel)
    
    # 初始化卷积核展开后的张量
    kernel_matrix = torch.zeros((out_channel, in_channel * input_h * input_w, output_h * output_w))
    
    for k in range(out_channel):
        # 初始化对应于一个输出通道的卷积核展开后的张量
        tmp_kernel_matrix = torch.zeros((in_channel, input_h, input_w, output_h * output_w))
        for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
            for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
                # 卷积区域填充卷积核参数
                tmp_kernel_matrix[:, i:i+kernel_h, j:j+kernel_w, i//stride * output_w + j//stride] = kernel[k, :, :, :]
        # 保存
        kernel_matrix[k, :, :] = tmp_kernel_matrix.reshape((-1, output_h * output_w))
    
    # 调整展开后的卷积核张量的维度，对应于转置卷积的转置操作
    full_kernel_matrix = kernel_matrix.repeat(bs, 1, 1, 1).reshape((bs, out_channel, in_channel, input_h * input_w, output_h * output_w))
    full_kernel_matrix = full_kernel_matrix.permute(0, 2, 1, 4, 3).reshape((bs, in_channel, out_channel * output_h * output_w, input_h * input_w))
    
    # 调整输出特征图的维度，通道、高和宽展开
    output_matrix = conv_output.reshape((bs, 1, 1, -1)).repeat(1, in_channel, 1, 1)
    
    # 矩阵相乘
    input = torch.matmul(output_matrix, full_kernel_matrix).reshape((bs, in_channel, input_h, input_w)) + bias.reshape((1, -1, 1, 1))
    
    # 根据两类padding调整最终输出特征图的尺寸
    final_input = input[:, :, padding:input_h-padding, padding:input_w-padding]
    final_input = F.pad(final_input, (0, output_padding, 0, output_padding, 0, 0, 0, 0))
    
    return final_input

# 手写实现的转置卷积
my_transpose_conv2d_output = transpose_conv_matrix_multiplication(pytorch_api_conv_output, kernel, stride=2, padding=1, bias=bias)

# 调用Pytorch API转置卷积的结果
pytorch_transpose_conv2d_output = F.conv_transpose2d(pytorch_api_conv_output, kernel, stride=2, padding=1, bias=bias)

# 验证手写实现的转置卷积与Pytorch API的结果相同
compare_result = torch.allclose(my_transpose_conv2d_output, pytorch_transpose_conv2d_output, rtol=0.00001, atol=0.00001)
print(compare_result)
    

True


### 转置卷积的分层叠加版本

In [7]:
bias = torch.randn(3)

def transpose_conv_block_stacking(conv_output, kernel, stride=1, padding=0, output_padding=0, bias=None):
    # 获取维度信息
    bs, out_channel, output_h, output_w = conv_output.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    # 计算卷积前特征图大小
    input_h = (output_h - 1) * stride + kernel_h
    input_w = (output_w - 1) * stride + kernel_w
    if bias is None:
        bias = torch.zeros(in_channel)
    # 初始化输入特征图
    input = torch.zeros((bs, in_channel, input_h, input_w))
    
    for b in range(bs): # 遍历batch size维度
        for oc in range(out_channel): # 遍历输出通道维度
            for i in range(0, input_h - kernel_h + 1, stride): # 对高度维进行遍历
                for j in range(0, input_w - kernel_w + 1, stride): # 对宽度维进行遍历
                    input[b, :, i:i+kernel_h, j:j+kernel_w] += conv_output[b, oc, i//stride, j//stride] * kernel[oc]
    
    # 加上偏置项
    input += bias.reshape((1, -1, 1, 1))
    
    # 根据两类padding调整最终输出特征图的尺寸
    final_input = input[:, :, padding:input_h-padding, padding:input_w-padding]
    final_input = F.pad(final_input, (0, output_padding, 0, output_padding, 0, 0, 0, 0))
    
    return final_input

# 手写实现的转置卷积
my_transpose_conv2d_output = transpose_conv_block_stacking(pytorch_api_conv_output, kernel, stride=2, padding=1, bias=bias)

# 调用Pytorch API转置卷积的结果
pytorch_transpose_conv2d_output = F.conv_transpose2d(pytorch_api_conv_output, kernel, stride=2, padding=1, bias=bias)

# 验证手写实现的转置卷积与Pytorch API的结果相同
compare_result = torch.allclose(my_transpose_conv2d_output, pytorch_transpose_conv2d_output, rtol=0.00001, atol=0.00001)
print(compare_result)
    
            

True


### 转置卷积的卷积版本

In [8]:
bias = torch.randn(3)

def transpose_conv_conv(conv_output, kernel, stride=1, padding=0, output_padding=0, bias=None):
    # 获取维度信息
    bs, out_channel, output_h, output_w = conv_output.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    # 计算中间构造的进行卷积的特征图大小
    map_h = (output_h - 1) * stride + 1
    map_w = (output_w - 1) * stride + 1
    # 计算填充大小
    p_h = kernel_h - 1 - padding
    p_w = kernel_w - 1 - padding
    if bias is None:
        bias = torch.zeros(in_channel)
    
    # 构造进行卷积的特征图
    feature_map = torch.zeros((bs, out_channel, map_h, map_w))
    # 用输出特征图填充构造的特征图
    feature_map[:, :, ::stride, ::stride] = conv_output
    
    # 定义卷积
    conv2d_layer = nn.Conv2d(out_channel, in_channel, kernel_size=kernel_h, stride=1, padding=p_h, bias=True)
    # kernel后两维数据翻转
    weight = torch.flip(kernel, [2, 3])
    # 替换卷积中的权重参数
    conv2d_layer.weight = nn.Parameter(weight.permute(1, 0, 2, 3))
    conv2d_layer.bias = nn.Parameter(bias)
    
    # 卷积操作
    final_input = conv2d_layer(feature_map)
    
    # 根据两类padding调整最终输出特征图的尺寸
    final_input = F.pad(final_input, (0, output_padding, 0, output_padding, 0, 0, 0, 0))
    
    return final_input

# 手写实现的转置卷积
my_transpose_conv2d_output = transpose_conv_conv(pytorch_api_conv_output, kernel, stride=2, padding=1, bias=bias)

# 调用Pytorch API转置卷积的结果
pytorch_transpose_conv2d_output = F.conv_transpose2d(pytorch_api_conv_output, kernel, stride=2, padding=1, bias=bias)

# 验证手写实现的转置卷积与Pytorch API的结果相同
compare_result = torch.allclose(my_transpose_conv2d_output, pytorch_transpose_conv2d_output, rtol=0.00001, atol=0.00001)
print(compare_result)

True


## 空洞卷积(DilatedConv)

In [9]:
input = torch.randn(4, 3, 5, 5) # 卷积输入特征图
kernel = torch.randn(64, 3, 3, 3) # 卷积核
bias = torch.randn(64)

def matrix_multiplication_for_conv2d_full_with_dilation(input, kernel, bias=None, stride=1, padding=0, dilation=1):
    # input, kernel都是4维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0)) # 左右、上下、channel、batch size
    
    bs, in_channel, input_h, input_w = input.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channel)
    # 计算填充空洞后的虚拟卷积核大小
    virtual_kernel_h = (kernel_h - 1) * dilation + 1
    virtual_kernel_w = (kernel_w - 1) * dilation + 1
    # 计算卷积后的特征图大小
    output_h = math.floor((input_h - virtual_kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - virtual_kernel_w) / stride) + 1 # 卷积输出的宽度
    # 初始化卷积核展开后的张量
    kernel_matrix = torch.zeros((out_channel, in_channel * input_h * input_w, output_h * output_w))
    
    for k in range(out_channel):
        tmp_kernel_matrix = torch.zeros((in_channel, input_h, input_w, output_h * output_w))
        for i in range(0, input_h - virtual_kernel_h + 1, stride): # 对高度维进行遍历
            for j in range(0, input_w - virtual_kernel_w + 1, stride): # 对宽度维进行遍历
                # 卷积区域填充卷积核参数
                tmp_kernel_matrix[:, i:i+virtual_kernel_h:dilation, j:j+virtual_kernel_w:dilation, i//stride * output_w + j//stride] = kernel[k, :, :, :]
        # 保存
        kernel_matrix[k, :, :] = tmp_kernel_matrix.reshape((-1, output_h * output_w))
    
    # 调整展开后卷积核的维度
    full_kernel_matrix = kernel_matrix.repeat(bs, 1, 1, 1)
    
    # 调整输入特征图的维度，通道、高和宽展开
    input_matrix = input.reshape((bs, 1, 1, -1)).repeat(1, out_channel, 1, 1)
    
    # 矩阵相乘
    output = torch.matmul(input_matrix, full_kernel_matrix).reshape((bs, out_channel, output_h, output_w)) + bias.reshape((1, -1, 1, 1))
    
    return output

# 矩阵乘法实现卷积的结果，flatten整个input版本
mat_mul_conv_output_full = matrix_multiplication_for_conv2d_full_with_dilation(input, kernel, bias=bias, padding=2, stride=2, dilation=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input, kernel, bias=bias, padding=2, stride=2, dilation=2)

# 验证矩阵乘法flatten整个input版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output_full, pytorch_api_conv_output, rtol=0.00001, atol=0.00001)
print(compare_result)

True


## 群组卷积(GroupConv)

In [11]:
input = torch.randn(4, 4, 5, 5) # 卷积输入特征图
kernel = torch.randn(64, 2, 3, 3) # 卷积核
bias = torch.randn(64)

def matrix_multiplication_for_conv2d_full_with_dilation_and_groups(input, kernel, bias=None, stride=1, padding=0, dilation=1, groups=1):
    # input, kernel都是4维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0)) # 左右、上下、channel、batch size
    
    bs, in_channel, input_h, input_w = input.shape
    out_channel, group_in_channel, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channel)
    
    # 计算填充空洞后的虚拟卷积核大小
    virtual_kernel_h = (kernel_h - 1) * dilation + 1
    virtual_kernel_w = (kernel_w - 1) * dilation + 1
    # 计算输入通道和输出通道每组的大小
    assert in_channel % groups == 0 and out_channel % groups == 0, "groups can not mod channel"   
    group_in_channel = in_channel // groups
    group_out_channel = out_channel // groups
    # 计算卷积后的特征图大小
    output_h = math.floor((input_h - virtual_kernel_h) / stride) + 1 # 卷积输出的高度
    output_w = math.floor((input_w - virtual_kernel_w) / stride) + 1 # 卷积输出的宽度
    # 初始化卷积核展开后的张量
    kernel_matrix = torch.zeros((out_channel, in_channel * input_h * input_w, output_h * output_w))
    
    for g in range(0, out_channel, group_out_channel):
        # 计算当前分组的输出通道起始下标
        out_channel_start_ind = g
        out_channel_end_ind = g + group_out_channel
        # 计算当前分组的输入通道起始下标
        in_channel_start_ind = (g // group_out_channel) * group_in_channel
        in_channel_end_ind = in_channel_start_ind + group_in_channel
        for k in range(out_channel_start_ind, out_channel_end_ind):
            tmp_kernel_matrix = torch.zeros((in_channel, input_h, input_w, output_h * output_w))
            for i in range(0, input_h - virtual_kernel_h + 1, stride): # 对高度维进行遍历
                for j in range(0, input_w - virtual_kernel_w + 1, stride): # 对宽度维进行遍历
                    # 卷积区域填充卷积核参数
                    tmp_kernel_matrix[in_channel_start_ind:in_channel_end_ind, i:i+virtual_kernel_h:dilation, j:j+virtual_kernel_w:dilation, i//stride * output_w + j//stride] = kernel[k, :, :, :]
            # 保存
            kernel_matrix[k, :, :] = tmp_kernel_matrix.reshape((-1, output_h * output_w))
    
    # 调整展开后卷积核的维度
    full_kernel_matrix = kernel_matrix.repeat(bs, 1, 1, 1)
    
    # 调整输入特征图的维度，通道、高和宽展开
    input_matrix = input.reshape((bs, 1, 1, -1)).repeat(1, out_channel, 1, 1)
    
    # 矩阵相乘
    output = torch.matmul(input_matrix, full_kernel_matrix).reshape((bs, out_channel, output_h, output_w)) + bias.reshape((1, -1, 1, 1))
    
    return output

# 矩阵乘法实现卷积的结果，flatten整个input版本
mat_mul_conv_output_full = matrix_multiplication_for_conv2d_full_with_dilation_and_groups(input, kernel, bias=bias, padding=2, stride=2, dilation=2, groups=2)

# 调用Pytorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input, kernel, bias=bias, padding=2, stride=2, dilation=2, groups=2)

# 验证矩阵乘法flatten整个input版本与Pytorch API的结果相同
compare_result = torch.allclose(mat_mul_conv_output_full, pytorch_api_conv_output, rtol=0.00001, atol=0.00001)
print(compare_result)

True
