In [33]:
import math
import torch.nn as nn
import torch
import torch.nn.functional as F

In [34]:
# 实例化一个二维卷积层
in_channel = 1
out_channel = 1
kernel_size = 3
bias_flag = False
height = 4
weight = 4
batch_size = 1
input_size1 = [batch_size, in_channel, height, weight] # [1,4,4]

conv_layer = torch.nn.Conv2d(in_channel, out_channel, kernel_size, bias=bias_flag)
conv_input = torch.randn(input_size1)
print(conv_input, "# conv_input")
conv_output = conv_layer(conv_input)
print(conv_output, "# conv_output")
print(conv_layer.weight.shape, "# [out_ch, in_ch, h, w]")

tensor([[[[ 0.4749,  0.2363,  0.6269,  0.9417],
          [-0.5951, -1.9033, -1.2701,  0.2512],
          [ 0.5078,  0.7907,  0.0940,  1.1174],
          [ 0.1305, -0.7959, -0.6058,  0.8090]]]]) # conv_input
tensor([[[[-0.3745, -0.7408],
          [-0.2931, -0.7413]]]], grad_fn=<ConvolutionBackward0>) # conv_output
torch.Size([1, 1, 3, 3]) # [out_ch, in_ch, h, w]


In [35]:
# 使用functional api
conv_F_output = F.conv2d(conv_input, conv_layer.weight)
print(conv_F_output, "# conv_F_output")

tensor([[[[-0.3745, -0.7408],
          [-0.2931, -0.7413]]]], grad_fn=<ConvolutionBackward0>) # conv_F_output


In [36]:
# 原始的矩阵运算实现二位卷积
tmp_input = torch.ones(5, 5)
tmp_kernel = torch.ones(3, 3) / 2
tmp_bias = torch.randn(1)
def matrix_multiplication_for_conv2d(input_tensor: torch.Tensor,
                                     kernel: torch.Tensor, stride=1,
                                     padding=0, bias=0):
    """
    不考虑batch_size维度和channel维度
    """
    if padding > 0:
        input_tensor = F.pad(input_tensor, (padding, padding, padding, padding))
    input_h, input_w = input_tensor.shape
    kernel_h, kernel_w = kernel.shape
    output_h = math.floor((input_h - kernel_h)/stride) + 1
    output_w = math.floor((input_w - kernel_w)/stride) + 1
    # 初始化一个输出矩阵
    output = torch.zeros(output_h, output_w)
    # 遍历
    for i in range(0, input_h - kernel_h + 1, stride):
        for j in range(0, input_w - kernel_w + 1, stride):
            # 取输入的区域
            region = input_tensor[i:i+kernel_h, j:j+kernel_w]
            # 区域与kernel点乘，逐元素相乘
            output[int(i/stride), int(j/stride)] =  torch.sum(region * kernel) + bias
    return output


res1 = matrix_multiplication_for_conv2d(tmp_input, tmp_kernel, padding=1,
                                        bias=tmp_bias)

res2 = F.conv2d(tmp_input.reshape(1,1,tmp_input.shape[0], tmp_input.shape[1]),
                tmp_kernel.reshape(1,1,tmp_kernel.shape[0], tmp_kernel.shape[1]),
                padding=1, bias=tmp_bias)
print(res1)
print(res2)

tensor([[1.0155, 2.0155, 2.0155, 2.0155, 1.0155],
        [2.0155, 3.5155, 3.5155, 3.5155, 2.0155],
        [2.0155, 3.5155, 3.5155, 3.5155, 2.0155],
        [2.0155, 3.5155, 3.5155, 3.5155, 2.0155],
        [1.0155, 2.0155, 2.0155, 2.0155, 1.0155]])
tensor([[[[1.0155, 2.0155, 2.0155, 2.0155, 1.0155],
          [2.0155, 3.5155, 3.5155, 3.5155, 2.0155],
          [2.0155, 3.5155, 3.5155, 3.5155, 2.0155],
          [2.0155, 3.5155, 3.5155, 3.5155, 2.0155],
          [1.0155, 2.0155, 2.0155, 2.0155, 1.0155]]]])


In [119]:
# 原始的矩阵运算实现二位卷积
tmp_input = torch.ones(5, 5)
tmp_kernel = torch.ones(3, 3) / 2
tmp_bias = torch.randn(1)
def matrix_multiplication_for_conv2d_flatten(input_tensor: torch.Tensor,
                                     kernel: torch.Tensor, stride=1,
                                     padding=0, bias=0):
    """
    不考虑batch_size维度和channel维度，flatten版本
    """
    if padding > 0:
        input_tensor = F.pad(input_tensor, (padding, padding, padding, padding))
    input_h, input_w = input_tensor.shape
    kernel_h, kernel_w = kernel.shape
    output_h = math.floor((input_h - kernel_h)/stride) + 1
    output_w = math.floor((input_w - kernel_w)/stride) + 1
    # 初始化一个输出矩阵
    output = torch.zeros(output_h, output_w)
    # 存储拉平后的特征区域
    region_matrix = torch.zeros(output.numel(), kernel.numel())
    # 将kernel转成列向量
    kernel_matrix = kernel.reshape((kernel.numel(), 1))
    row_index = 0
    # 遍历
    for i in range(0, input_h - kernel_h + 1, stride):
        for j in range(0, input_w - kernel_w + 1, stride):
            # 取输入的区域
            region = input_tensor[i:i+kernel_h, j:j+kernel_w]
            region_vector = torch.flatten(region)
            region_matrix[row_index] = region_vector
            row_index += 1
    # 矩阵乘法
    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape((output_h, output_w)) + bias

    return output



res1 = matrix_multiplication_for_conv2d_flatten(tmp_input, tmp_kernel, padding=1,
                                        bias=tmp_bias)

res2 = F.conv2d(tmp_input.reshape(1,1,tmp_input.shape[0], tmp_input.shape[1]),
                tmp_kernel.reshape(1,1,tmp_kernel.shape[0], tmp_kernel.shape[1]),
                padding=1, bias=tmp_bias)
flag = torch.allclose(res1, res2)
print(flag)
print(res1)
print(res2)

True
tensor([[2.7592, 3.7592, 3.7592, 3.7592, 2.7592],
        [3.7592, 5.2592, 5.2592, 5.2592, 3.7592],
        [3.7592, 5.2592, 5.2592, 5.2592, 3.7592],
        [3.7592, 5.2592, 5.2592, 5.2592, 3.7592],
        [2.7592, 3.7592, 3.7592, 3.7592, 2.7592]])
tensor([[[[2.7592, 3.7592, 3.7592, 3.7592, 2.7592],
          [3.7592, 5.2592, 5.2592, 5.2592, 3.7592],
          [3.7592, 5.2592, 5.2592, 5.2592, 3.7592],
          [3.7592, 5.2592, 5.2592, 5.2592, 3.7592],
          [2.7592, 3.7592, 3.7592, 3.7592, 2.7592]]]])


In [57]:
# 原始的矩阵运算实现二位卷积
batch_size = 2
in_channel = 2
inh = 5
inw = 5
out_channel = 3
kh = 3
kw = 3
tmp_input = torch.ones(batch_size, in_channel, inh, inw)
tmp_kernel = torch.ones(out_channel, in_channel, kh, kw) / 2
# bias的是个标量，但是对应每个输出通道不同
tmp_bias = torch.randn(out_channel)
def matrix_multiplication_for_conv2d_full(input_tensor: torch.Tensor,
                                     kernel: torch.Tensor, stride=1,
                                     padding=0, bias=0):
    """
    考虑batch_size维度和channel维度
    input和kernel都是4维，
    """
    if padding > 0:
        # batch_size，channel维度都不需要填充
        input_tensor = F.pad(input_tensor, (padding, padding, padding, padding,0,0,0,0))
    if bias is None:
        bias = torch.zeros(out_channel)
    # input shape: batch_size, in channel, h, w
    bs, in_ch, input_h, input_w = input_tensor.shape
    # kernel shape: out_channel, in_channel, kernel_h, kernel_w
    out_ch, in_ch, kernel_h, kernel_w = kernel.shape

    output_h = math.floor((input_h - kernel_h)/stride) + 1
    output_w = math.floor((input_w - kernel_w)/stride) + 1
    # 初始化一个输出矩阵
    output = torch.zeros(bs, out_ch, output_h, output_w)
    # 5层遍历,逐层遍历batch_size, out_channel, in_channel, h, w
    for ind in range(bs):
        for oc in range(out_ch):
            for ic in range(in_ch):
                for i in range(0, input_h - kernel_h + 1, stride):
                    for j in range(0, input_w - kernel_w + 1, stride):
                        # 取输入的区域
                        region = input_tensor[ind, ic, i:i+kernel_h, j:j+kernel_w]
                        # 输入区域与kernel计算卷积，逐元素相乘,输出通道是所有输入通道的求和，是+=
                        # kernel也要取出区域，取第oc通道的第ic通道
                        output[ind, oc, int(i/stride), int(j/stride)] \
                            +=  torch.sum(region * kernel[oc, ic])
            # bias是对每个output channel相加
            output[ind, oc] += bias[oc]
    return output


res1 = matrix_multiplication_for_conv2d_full(tmp_input, tmp_kernel, padding=1,
                                        bias=tmp_bias, stride=2)

res2 = F.conv2d(tmp_input, tmp_kernel,
                padding=1, bias=tmp_bias, stride=2)
flag = torch.allclose(res1, res2)
print(flag)
print(res1)
print(res2)

True
tensor([[[[3.0971, 5.0971, 3.0971],
          [5.0971, 8.0971, 5.0971],
          [3.0971, 5.0971, 3.0971]],

         [[4.0109, 6.0109, 4.0109],
          [6.0109, 9.0109, 6.0109],
          [4.0109, 6.0109, 4.0109]],

         [[2.4993, 4.4993, 2.4993],
          [4.4993, 7.4993, 4.4993],
          [2.4993, 4.4993, 2.4993]]],


        [[[3.0971, 5.0971, 3.0971],
          [5.0971, 8.0971, 5.0971],
          [3.0971, 5.0971, 3.0971]],

         [[4.0109, 6.0109, 4.0109],
          [6.0109, 9.0109, 6.0109],
          [4.0109, 6.0109, 4.0109]],

         [[2.4993, 4.4993, 2.4993],
          [4.4993, 7.4993, 4.4993],
          [2.4993, 4.4993, 2.4993]]]])
tensor([[[[3.0971, 5.0971, 3.0971],
          [5.0971, 8.0971, 5.0971],
          [3.0971, 5.0971, 3.0971]],

         [[4.0109, 6.0109, 4.0109],
          [6.0109, 9.0109, 6.0109],
          [4.0109, 6.0109, 4.0109]],

         [[2.4993, 4.4993, 2.4993],
          [4.4993, 7.4993, 4.4993],
          [2.4993, 4.4993, 2.4993]]],




In [122]:
# Unfold用法
kernel_h = 2
kernel_w = 3
in_channel = 5
in_h = 3
in_w = 4
batch_size = 2
out_channel = 1
unfold = nn.Unfold(kernel_size=(kernel_h, kernel_w))
input = torch.randn(batch_size, in_channel, in_h, in_w)
output = unfold(input)
print(output.size(), "# ouput size")


torch.Size([2, 30, 4]) # ouput size


In [146]:
# kernel展开
def get_kernel_matrix(kernel, input_size):
    """先不考虑batch，channel，padding. 并假设stride=1
    得到kernel矩阵，将kernel拉长、填充
    如3×3的kernel变成5×5的向量
    将所有的向量堆叠起来成一个矩阵
    """
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size.shape
    num_out_feat_map = (input_h - kernel_h + 1) * (input_w - kernel_w + 1)
    # 初始化res矩阵
    result = torch.zeros((num_out_feat_map, input_h*input_w))
    # 分别对高度维和宽度维循环
    count = 0
    for i in range(0, input_h - kernel_h + 1, 1):
        for j in range(0, input_w - kernel_w + 1, 1):
            # pad操作：先左右后上下填充0, 使得填充后的大小和输入的大小一致
            padded_kernel = F.pad(kernel,[j, input_w - kernel_w - j, i, input_h - kernel_h - i])
            # 每次填充后的kernel拉直送入result中
            result[count] = padded_kernel.flatten()
            count += 1
    return result

In [147]:
def test_get_kernel_matrix():
    kernel = torch.randn(3, 3)
    input = torch.randn(4, 4)
    kernel_matrix = get_kernel_matrix(kernel, input)
    # print(kernel_matrix, '\n', kernel_matrix.shape)
    mm_conv2d_output = kernel_matrix @ input.reshape((-1, 1))
    pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0),
                                     kernel.unsqueeze(0).unsqueeze(0))
    print(mm_conv2d_output, "\n", pytorch_conv2d_output)

test_get_kernel_matrix()



tensor([[-1.2622],
        [ 5.0887],
        [-0.3410],
        [ 3.7604]]) 
 tensor([[[[-1.2622,  5.0887],
          [-0.3410,  3.7604]]]])


In [1]:
def test_transpose_conv2d_demo():
    # 转置卷积实现上采样
    kernel = torch.randn(3, 3)
    input = torch.randn(4, 4)
    kernel_matrix = get_kernel_matrix(kernel, input)
    mm_conv2d_output = kernel_matrix @ input.reshape((-1, 1))
    pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0),
                                     kernel.unsqueeze(0).unsqueeze(0))
    # [16,4] @ [4,1]
    mm_transposed_conv2d_output = kernel_matrix.transpose(-1, -2) @ mm_conv2d_output
    py_transpose_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output,
                                                    kernel.unsqueeze(0).unsqueeze(0))
    print(mm_transposed_conv2d_output.reshape(4,4))
    print(py_transpose_conv2d_output)


test_transpose_conv2d_demo()

NameError: name 'torch' is not defined