# 卷积神经网络

## 二维互相关运算

In [1]:
from mxnet import autograd as ag
from mxnet import nd
from mxnet.gluon import nn

In [28]:
def corr2d(X,K):
    h,w=K.shape
    Y=nd.zeros((X.shape[0]-h+1,X.shape[1]-w+1))
    for i in range(X.shape[0]-h+1):
        for j in range(X.shape[1]-w+1):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
            print(Y[i,j])
    return Y

In [29]:
X = nd.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = nd.array([[0, 1], [2, 3]])
corr2d(X, K)


[19.]
<NDArray 1 @cpu(0)>

[25.]
<NDArray 1 @cpu(0)>

[37.]
<NDArray 1 @cpu(0)>

[43.]
<NDArray 1 @cpu(0)>



[[19. 25.]
 [37. 43.]]
<NDArray 2x2 @cpu(0)>

## 二维卷积层

In [6]:
class Conv2d(nn.Block):
    def __init__(self,kernel_size,**kwargs):
        super(Conv2d,self).__init__(**kwargs)
        self.weight=self.params.get('weight',shape=kernel_size)
        self.bias=self.params.get('bias',shape=(1,))
        
    def forward(self,x):
        return corr2d(x,self.weight.data())+self.bias.data()

## 通过数据学习核数组

In [65]:
from mxnet import nd
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet import gluon

conv2d=nn.Conv2D(1,kernel_size=(1,2))
conv2d.initialize()

##X=nd.ones((1,1,6,8))
##X[:,:,:,2:6]=0
##K=nd.array([[1,-1]])
##Y=corr2d(X,K)
##corr2d只能用在二维数组
X=nd.ones((6,8))
X[:,4:5]=0
K=nd.array([[1,-1]])
Y=corr2d(X,K)
print(X,K,Y)
X=X.reshape((1,1,6,8))
Y=Y.reshape((1,1,6,7))

for i in range(10):
    with ag.record():
        Y_hat=conv2d(X)
        l=(Y-Y_hat)**2
    l.backward()
    conv2d.weight.data()[:]-=16e-3 * conv2d.weight.grad()
    print('NO.%d, loss:%.3f'%(i+1,l.sum().asscalar()))


[[1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 1. 0. 1. 1. 1.]]
<NDArray 6x8 @cpu(0)> 
[[ 1. -1.]]
<NDArray 1x2 @cpu(0)> 
[[ 0.  0.  0.  1. -1.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]]
<NDArray 6x7 @cpu(0)>
NO.1, loss:12.500
NO.2, loss:8.163
NO.3, loss:5.332
NO.4, loss:3.485
NO.5, loss:2.279
NO.6, loss:1.493
NO.7, loss:0.981
NO.8, loss:0.649
NO.9, loss:0.433
NO.10, loss:0.295


In [66]:
print(conv2d.weight.data().reshape((1,2)))


[[ 0.8634343 -0.8944946]]
<NDArray 1x2 @cpu(0)>


## 填充与步长

### 填充

In [8]:
from mxnet import nd
from mxnet.gluon import nn

def comp_conv2d(conv2d,X):
    conv2d.initialize()
    print(X)
    X=X.reshape((1,1)+X.shape)
    print(X)
    Y=conv2d(X)
    return Y.reshape(Y.shape[2:])

In [10]:
##当卷积核的宽与高相同时
conv2d=nn.Conv2D(1,kernel_size=3,padding=1)
X=nd.random.uniform(shape=(8,8))
comp_conv2d(conv2d,X)


[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
  0.94411975 0.9280813 ]
 [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
  0.5000263  0.6214784 ]
 [0.05037006 0.5772286  0.69909805 0.23789282 0.9923964  0.934214
  0.26726255 0.6139659 ]
 [0.6790906  0.5356328  0.8642814  0.58991    0.75084424 0.730122
  0.96448976 0.311945  ]
 [0.55424243 0.39822108 0.2123905  0.20984375 0.22244322 0.186193
  0.21874937 0.9443724 ]
 [0.5695735  0.73955077 0.45210904 0.49045882 0.97023666 0.22741462
  0.6805447  0.25435647]
 [0.08529557 0.05802916 0.05641833 0.43441662 0.4878377  0.3117959
  0.8810046  0.6963435 ]
 [0.97640437 0.37775183 0.6176579  0.17960368 0.54249877 0.02467873
  0.8546136  0.06724963]]
<NDArray 8x8 @cpu(0)>

[[[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
    0.94411975 0.9280813 ]
   [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
    0.5000263  0.6214784 ]
   [0.05037006 0.5772286  0.69909805 0.23789


[[0.06325529 0.08711027 0.05213947 0.04129013 0.07094524 0.04364694
  0.07483687 0.05889497]
 [0.04075016 0.09478176 0.10307626 0.07327566 0.13539585 0.11204958
  0.09452803 0.1058052 ]
 [0.07118268 0.12013233 0.12749954 0.09354834 0.1206032  0.12827429
  0.09391213 0.077944  ]
 [0.06186286 0.06422949 0.08159543 0.06125565 0.06986945 0.0949606
  0.10086315 0.08174755]
 [0.08508988 0.10460718 0.09291475 0.1102097  0.10998198 0.09299761
  0.10611159 0.08707771]
 [0.03711537 0.0514392  0.04428569 0.06568468 0.07720461 0.06555483
  0.10024891 0.08495331]
 [0.08007845 0.09201317 0.08373873 0.07328317 0.08486408 0.08106042
  0.09733039 0.05771422]
 [0.02810926 0.01039866 0.01587977 0.01460138 0.03855877 0.0233157
  0.05205503 0.0436906 ]]
<NDArray 8x8 @cpu(0)>

In [11]:
##当卷积核的宽与高不同时
conv2d=nn.Conv2D(1,kernel_size=(5,3),padding=(2,1))
comp_conv2d(conv2d,X)


[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
  0.94411975 0.9280813 ]
 [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
  0.5000263  0.6214784 ]
 [0.05037006 0.5772286  0.69909805 0.23789282 0.9923964  0.934214
  0.26726255 0.6139659 ]
 [0.6790906  0.5356328  0.8642814  0.58991    0.75084424 0.730122
  0.96448976 0.311945  ]
 [0.55424243 0.39822108 0.2123905  0.20984375 0.22244322 0.186193
  0.21874937 0.9443724 ]
 [0.5695735  0.73955077 0.45210904 0.49045882 0.97023666 0.22741462
  0.6805447  0.25435647]
 [0.08529557 0.05802916 0.05641833 0.43441662 0.4878377  0.3117959
  0.8810046  0.6963435 ]
 [0.97640437 0.37775183 0.6176579  0.17960368 0.54249877 0.02467873
  0.8546136  0.06724963]]
<NDArray 8x8 @cpu(0)>

[[[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
    0.94411975 0.9280813 ]
   [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
    0.5000263  0.6214784 ]
   [0.05037006 0.5772286  0.69909805 0.23789


[[-0.00888047 -0.02277429 -0.04712824 -0.08536141 -0.00412371 -0.08338698
  -0.09505665 -0.04686656]
 [ 0.03326854  0.00289858  0.00458245  0.01253504  0.07885367  0.00546528
   0.03784898 -0.02732092]
 [ 0.05854703  0.00328338  0.02516939 -0.03242282 -0.04789305  0.00110136
  -0.0409156   0.05459823]
 [-0.00545369 -0.01507675 -0.02798554 -0.02966088  0.00888432 -0.07567502
   0.00566554  0.01936315]
 [ 0.0320082   0.0071927   0.04830979  0.09353697  0.03961273  0.08774883
   0.05352303 -0.03767506]
 [-0.01207006 -0.07034823 -0.0107752  -0.03641871 -0.01443569 -0.05020854
   0.1359678   0.02368721]
 [ 0.08420652  0.0671168   0.03546277  0.02387804 -0.00781548 -0.02139696
  -0.06520914 -0.08192366]
 [-0.10143605 -0.07095678 -0.00089312 -0.03817952  0.01155506  0.01632132
   0.01170317  0.0243714 ]]
<NDArray 8x8 @cpu(0)>

### 步长

In [12]:
conv2d=nn.Conv2D(1,kernel_size=3,padding=1,strides=2)
comp_conv2d(conv2d,X)


[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
  0.94411975 0.9280813 ]
 [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
  0.5000263  0.6214784 ]
 [0.05037006 0.5772286  0.69909805 0.23789282 0.9923964  0.934214
  0.26726255 0.6139659 ]
 [0.6790906  0.5356328  0.8642814  0.58991    0.75084424 0.730122
  0.96448976 0.311945  ]
 [0.55424243 0.39822108 0.2123905  0.20984375 0.22244322 0.186193
  0.21874937 0.9443724 ]
 [0.5695735  0.73955077 0.45210904 0.49045882 0.97023666 0.22741462
  0.6805447  0.25435647]
 [0.08529557 0.05802916 0.05641833 0.43441662 0.4878377  0.3117959
  0.8810046  0.6963435 ]
 [0.97640437 0.37775183 0.6176579  0.17960368 0.54249877 0.02467873
  0.8546136  0.06724963]]
<NDArray 8x8 @cpu(0)>

[[[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
    0.94411975 0.9280813 ]
   [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
    0.5000263  0.6214784 ]
   [0.05037006 0.5772286  0.69909805 0.23789


[[ 0.02798326  0.02309377  0.05334493  0.03356022]
 [ 0.02211411  0.04601283  0.01558262  0.04041121]
 [ 0.01420592 -0.01112313  0.0274049   0.05834443]
 [ 0.03116412  0.02038928  0.05861697  0.0773566 ]]
<NDArray 4x4 @cpu(0)>

In [22]:
conv2d=nn.Conv2D(1,kernel_size=(3,5),padding=(0,1),strides=(3,5))
comp_conv2d(conv2d,X)
##当strides为1时，输出shape为（6,6），所以6/5≈2，因为取不到的值默认为0


[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
  0.94411975 0.9280813 ]
 [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
  0.5000263  0.6214784 ]
 [0.05037006 0.5772286  0.69909805 0.23789282 0.9923964  0.934214
  0.26726255 0.6139659 ]
 [0.6790906  0.5356328  0.8642814  0.58991    0.75084424 0.730122
  0.96448976 0.311945  ]
 [0.55424243 0.39822108 0.2123905  0.20984375 0.22244322 0.186193
  0.21874937 0.9443724 ]
 [0.5695735  0.73955077 0.45210904 0.49045882 0.97023666 0.22741462
  0.6805447  0.25435647]
 [0.08529557 0.05802916 0.05641833 0.43441662 0.4878377  0.3117959
  0.8810046  0.6963435 ]
 [0.97640437 0.37775183 0.6176579  0.17960368 0.54249877 0.02467873
  0.8546136  0.06724963]]
<NDArray 8x8 @cpu(0)>

[[[[0.1709866  0.7936977  0.46345097 0.22392468 0.87457293 0.34535167
    0.94411975 0.9280813 ]
   [0.6082529  0.7044144  0.5966554  0.03183892 0.78364426 0.16469416
    0.5000263  0.6214784 ]
   [0.05037006 0.5772286  0.69909805 0.23789


[[ 0.06721418 -0.0340129 ]
 [ 0.00882575  0.00317134]]
<NDArray 2x2 @cpu(0)>

## 多输入通道及多输出通道

### 多输入通道

In [1]:
from mxnet.gluon import nn
from mxnet import nd
import d2lzh as d2l

In [8]:
def corr2d_multi_in(X,K):
    return nd.add_n(*[d2l.corr2d(x,k) for x,k in zip(X,K)])

In [9]:
X = nd.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = nd.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

corr2d_multi_in(X, K)


[[ 56.  72.]
 [104. 120.]]
<NDArray 2x2 @cpu(0)>

### 多输出通道

In [10]:
def corr2d_multi_in_out(X,K):
    return nd.stack(*[corr2d_multi_in(X,k) for k in K])

In [11]:
K = nd.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])
K=nd.stack(K,K+1,K+2)
K.shape

(3, 2, 2, 2)

In [13]:
corr2d_multi_in_out(X,K)


[[[ 56.  72.]
  [104. 120.]]

 [[ 76. 100.]
  [148. 172.]]

 [[ 96. 128.]
  [192. 224.]]]
<NDArray 3x2x2 @cpu(0)>

### 1X1卷积层

In [None]:
def corr2d_multi_in_out_1x1(X,K):
    c_i,h,w=X.shape
    c_o=K.shape[0]
    X=X.reshape((c_i,h*w))
    K=K.reshape((c_o,c_i))
    Y=nd.dot(K,X)
    return Y.shape(c_o,h,w)
    ## 计算结果与corr2d_multi_in_out是一样的