In [3]:
import torch
from torch import nn

## 二维卷积计算

In [4]:
def conv2d(x: torch.Tensor, k: torch.Tensor): #@save
    """
        二维卷积计算
    """
    h, w = k.shape
    y = torch.zeros((x.shape[0]-h+1, x.shape[1]-w+1))

    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            y[i, j] = (x[i:i+h, j:j+w]*k).sum()

    return y


In [5]:
x = torch.tensor([[0, 1, 2],
                  [3, 4, 5],
                  [6, 7, 8]])
k = torch.tensor([[0, 1],
                  [2, 3]])


In [6]:
y = conv2d(x,k)

In [7]:
y

tensor([[19., 25.],
        [37., 43.]])

## 水平边缘

In [8]:
x = torch.ones((5,6))
x[2:4,:] = 0
x

tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1.]])

In [9]:
k = torch.tensor([[1],[-1]])

In [10]:
conv2d(x,k)

tensor([[ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  1.,  1.,  1.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [-1., -1., -1., -1., -1., -1.]])

In [11]:
conv2d(x,k).abs()

tensor([[0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1.]])

## 垂直边缘

In [12]:
x = torch.ones((6,8))
x[:,2:6] = 0
x

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [13]:
k = torch.tensor([[1,-1]])


In [14]:
conv2d(x,k)

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [15]:
conv2d(x,k).abs()

tensor([[0., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 1., 0.]])

## 训练-垂直边缘

In [16]:
y = conv2d(x,k)
y


tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [17]:
y = y.reshape((1,1,y.shape[0],y.shape[1]))

In [18]:
# batch_size c h w
x = x.reshape((1,1,x.shape[0],x.shape[1]))
x

tensor([[[[1., 1., 0., 0., 0., 0., 1., 1.],
          [1., 1., 0., 0., 0., 0., 1., 1.],
          [1., 1., 0., 0., 0., 0., 1., 1.],
          [1., 1., 0., 0., 0., 0., 1., 1.],
          [1., 1., 0., 0., 0., 0., 1., 1.],
          [1., 1., 0., 0., 0., 0., 1., 1.]]]])

In [19]:
conv2D = nn.Conv2d(1,1,(1,2),bias=False)

In [20]:
lr = 0.03
epoch = 200
loss_func = nn.MSELoss()

In [21]:
for i in range(epoch):
    y_hat = conv2D(x)
    loss = loss_func(y_hat,y)
    # loss = torch.square((y_hat-y)).sum()
    # 均方差损失函数没有直接求方差的效果好
    # 在lr相同时，需要调大epoch
    # 也可以直接调大lr
    conv2D.zero_grad()
    loss.backward()
    conv2D.weight.data[:] = conv2D.weight.data[:] - lr*conv2D.weight.grad

    print(f'--- epoch {i} loss {loss}---')

--- epoch 0 loss 0.8351865410804749---
--- epoch 1 loss 0.7853633165359497---
--- epoch 2 loss 0.7393738627433777---
--- epoch 3 loss 0.6969024538993835---
--- epoch 4 loss 0.6576598286628723---
--- epoch 5 loss 0.6213808655738831---
--- epoch 6 loss 0.5878223776817322---
--- epoch 7 loss 0.5567620396614075---
--- epoch 8 loss 0.5279955267906189---
--- epoch 9 loss 0.5013357400894165---
--- epoch 10 loss 0.4766111969947815---
--- epoch 11 loss 0.45366472005844116---
--- epoch 12 loss 0.43235230445861816---
--- epoch 13 loss 0.41254180669784546---
--- epoch 14 loss 0.3941120505332947---
--- epoch 15 loss 0.37695202231407166---
--- epoch 16 loss 0.3609600067138672---
--- epoch 17 loss 0.3460427522659302---
--- epoch 18 loss 0.33211439847946167---
--- epoch 19 loss 0.3190966844558716---
--- epoch 20 loss 0.30691760778427124---
--- epoch 21 loss 0.2955110967159271---
--- epoch 22 loss 0.28481659293174744---
--- epoch 23 loss 0.2747785449028015---
--- epoch 24 loss 0.26534605026245117---
--

In [22]:
conv2D.weight

Parameter containing:
tensor([[[[ 0.8161, -0.8159]]]], requires_grad=True)

## 填充

In [23]:
x = torch.rand((8,8))
x.shape

torch.Size([8, 8])

In [24]:
x = x.reshape((1,1)+x.shape)
x.shape

torch.Size([1, 1, 8, 8])

In [25]:
conv2D = nn.Conv2d(1,1,(5,5))
y = conv2D(x)
y.shape


torch.Size([1, 1, 4, 4])

In [26]:
conv2D = nn.Conv2d(1,1,(5,5),padding=(2,2))
y = conv2D(x)
y.shape


torch.Size([1, 1, 8, 8])

In [27]:
conv2D = nn.Conv2d(1,1,(3,5))
y = conv2D(x)
y.shape

torch.Size([1, 1, 6, 4])

In [28]:
conv2D = nn.Conv2d(1,1,(3,5),padding=(1,2))
y = conv2D(x)
y.shape

torch.Size([1, 1, 8, 8])

## 多输入通道

每个channel先单独计算卷积, 然后求和

输入是两个通道, 输出仍是一个通道

In [29]:
x = torch.tensor([
    [[1, 2, 3],
     [4, 5, 6],
     [7, 8, 9]],
    [[0, 1, 2],
     [3, 4, 5],
     [6, 7, 8]]])

k = torch.tensor(
    [[[1,2],
      [3,4]],
     [[0,1],
      [2,3]]])


In [30]:
x.shape,k.shape

(torch.Size([2, 3, 3]), torch.Size([2, 2, 2]))

In [31]:
def conv2d_multi_in(x, w):
    channels = x.shape[0]
    y = torch.zeros((channels, x.shape[1]-w.shape[1]+1, x.shape[2]-w.shape[2]+1))
    for channel in range(y.shape[0]):
        for i in range(y.shape[1]):
            for j in range(y.shape[2]):
                y[channel, i, j] = (x[channel, i:i+w.shape[1], j:j+w.shape[2]] * w[channel,:,:]).sum()
                # print(channel,i,j,y[channel,i,j])

    res = torch.zeros((y.shape[1], y.shape[2]))
    for channel in range(y.shape[0]):
        res += y[channel, :, :]

    return res


In [32]:
conv2d_multi_in(x,k)

tensor([[ 56.,  72.],
        [104., 120.]])

## 多输出通道

$$
    O_o*O_i*k_h*k_w
    \\
    输出通道*输入通道*卷积核形状
$$



In [33]:
k.shape

torch.Size([2, 2, 2])

In [34]:
k = torch.stack([k,k+1,k+2])
k.shape

torch.Size([3, 2, 2, 2])

In [35]:
k

tensor([[[[1, 2],
          [3, 4]],

         [[0, 1],
          [2, 3]]],


        [[[2, 3],
          [4, 5]],

         [[1, 2],
          [3, 4]]],


        [[[3, 4],
          [5, 6]],

         [[2, 3],
          [4, 5]]]])

In [36]:
def conv2d_multi_in_out(x,k):
    # i in k 遍历k的每一个通道（三维张量）
    # torch.stack将每个计算结果都连接起来
    return torch.stack([conv2d_multi_in(x,i) for i in k ])

In [37]:
conv2d_multi_in_out(x,k)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

## 汇聚层

最大汇聚层, 将某个形状窗口中的最大值作为结果

平均汇聚层, 将某个形状窗口中的平均值作为结果


In [59]:
def pool2d(x,pool_size,mode='max'):
    y = torch.ones((x.shape[0]-pool_size[0]+1,x.shape[0]-pool_size[1]+1))
    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            if mode == 'max':
                y[i,j] = x[i:i+pool_size[0],j:j+pool_size[1]].max()
            else:
                y[i,j] = x[i:i+pool_size[0],j:j+pool_size[1]].mean()

    return y


In [60]:
x = torch.tensor([
    [0,1,2,],
    [3,4,5,],
    [6,7,8,],
    ],dtype=float)

In [61]:
pool2d(x,(2,2),'max')

tensor([[4., 5.],
        [7., 8.]])

In [62]:
pool2d(x,(2,2),'avg')


tensor([[2., 3.],
        [5., 6.]])