尝试自己实现 Conv2d，支持多通道输入和输出。

网上已经有人给出相应的实现：https://discuss.pytorch.org/t/how-was-conv2d-implemented-in-pytorch/35223/2

In [5]:
import torch
from torch import nn

# 先来看看 Conv2d 的参数的结构
conv2d = nn.Conv2d(3, 2, (4, 5))

for name, param in conv2d.named_parameters():
    print(name, param.shape)

weight torch.Size([2, 3, 4, 5])
bias torch.Size([2])


In [21]:
# Batch * Channels * Height * width
X = torch.randn(1, 3, 10, 12)

# 观察上面的输出，conv2d.weight 的维度分别是 [out_channels, in_channels, kernel_height, kernel_wdith]
# weight for conv2d: out
w = nn.Parameter(torch.randn(2, 3, 4, 5))

# 将我们自己初始化的参数给卷积层的参数赋值
conv2d.weight = w
conv2d.bias = nn.Parameter(torch.zeros(2))
Y = conv2d(X)
print(Y.shape)
print(Y)

torch.Size([1, 2, 7, 8])
tensor([[[[ 4.0505e+00,  5.0768e+00,  1.1193e+01,  9.3357e+00,  3.5201e+00,
           -2.8899e+00,  1.4229e+01,  8.7867e+00],
          [-1.1399e+00, -2.2456e+00, -3.3975e+00, -4.6363e+00,  8.5825e+00,
           -9.6571e+00,  3.9503e+00,  6.7593e+00],
          [ 1.0994e+01, -9.8126e+00,  3.2591e+00,  8.9945e+00,  1.6969e+01,
           -1.2465e+01,  5.9345e+00,  4.1210e-01],
          [-5.0153e+00, -4.2659e+00, -4.0778e+00, -1.4153e+00, -1.4689e+00,
            8.3639e+00, -4.6947e+00,  5.1888e+00],
          [-1.0983e+00, -4.7729e+00, -1.3250e+00, -1.1528e+01, -8.8684e+00,
            4.0537e+00,  3.2212e+00,  4.2422e+00],
          [ 1.3402e+00, -2.8066e+00, -4.1240e-01,  5.5571e+00,  3.6924e+00,
           -3.3545e+00, -6.2308e+00,  1.2915e+01],
          [-1.8232e+00,  1.3599e+01,  1.2224e+01,  3.6629e+00,  4.4683e+00,
            8.0019e+00,  5.2227e+00, -4.4041e+00]],

         [[ 5.0802e+00,  3.6428e+00, -1.7184e+00, -1.0121e-01,  1.1512e+01,
        

In [23]:
# 下面尝试手写自己的实现
X_unf = nn.functional.unfold(X, (4, 5))
print(X_unf.shape)
# 输出维度 (N, C * pord(kernel_size), prod(out_img_size))
# 第 2 维表示每个 block 有多少个值，第 3 维则表示有多少个 block，每个 block 都是 kernel_size 大小的（这里的 4*5），有多少个 block 就是说有多少个输出

torch.Size([1, 60, 56])


In [30]:
# [1 * 56 * 60] * [60 * 2] = [1 * 56 * 2] -> [1 * 2 * 56]
Y_unf = X_unf.transpose(1, 2).matmul(w.view(w.size(0), -1).t()).transpose(1, 2)
Y1 = nn.functional.fold(Y_unf, (7, 8), (1, 1))
# 也可以直接 reshape 就行
# Y = Y_unf.reshape(1, 2, 7, 8)

assert((Y - Y1).sum().abs() < 1e-4)

In [31]:
print((torch.nn.functional.conv2d(X) - Y1).abs().max())

tensor(3.8147e-06, grad_fn=<MaxBackward1>)
