# 卷积神经网络

## 二维互相关运算

In [None]:
from mxnet import autograd as ag
from mxnet import nd
from mxnet.gluon import nn

In [None]:
def corr2d(X,K):
    h,w=K.shape
    Y=nd.zeros((X.shape[0]-h+1,X.shape[1]-w+1))
    for i in range(X.shape[0]-h+1):
        for j in range(X.shape[1]-w+1):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
            print(Y[i,j])
    return Y

In [None]:
X = nd.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = nd.array([[0, 1], [2, 3]])
corr2d(X, K)

## 二维卷积层

In [None]:
class Conv2d(nn.Block):
    def __init__(self,kernel_size,**kwargs):
        super(Conv2d,self).__init__(**kwargs)
        self.weight=self.params.get('weight',shape=kernel_size)
        self.bias=self.params.get('bias',shape=(1,))
        
    def forward(self,x):
        return corr2d(x,self.weight.data())+self.bias.data()

## 通过数据学习核数组

In [None]:
from mxnet import nd
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet import gluon

conv2d=nn.Conv2D(1,kernel_size=(1,2))
conv2d.initialize()

##X=nd.ones((1,1,6,8))
##X[:,:,:,2:6]=0
##K=nd.array([[1,-1]])
##Y=corr2d(X,K)
##corr2d只能用在二维数组
X=nd.ones((6,8))
X[:,4:5]=0
K=nd.array([[1,-1]])
Y=corr2d(X,K)
print(X,K,Y)
X=X.reshape((1,1,6,8))
Y=Y.reshape((1,1,6,7))

for i in range(10):
    with ag.record():
        Y_hat=conv2d(X)
        l=(Y-Y_hat)**2
    l.backward()
    conv2d.weight.data()[:]-=16e-3 * conv2d.weight.grad()
    print('NO.%d, loss:%.3f'%(i+1,l.sum().asscalar()))

In [None]:
print(conv2d.weight.data().reshape((1,2)))

## 填充与步长

### 填充

In [None]:
from mxnet import nd
from mxnet.gluon import nn

def comp_conv2d(conv2d,X):
    conv2d.initialize()
    print(X)
    X=X.reshape((1,1)+X.shape)
    print(X)
    Y=conv2d(X)
    return Y.reshape(Y.shape[2:])

In [None]:
##当卷积核的宽与高相同时
conv2d=nn.Conv2D(1,kernel_size=3,padding=1)
X=nd.random.uniform(shape=(8,8))
comp_conv2d(conv2d,X)

In [None]:
##当卷积核的宽与高不同时
conv2d=nn.Conv2D(1,kernel_size=(5,3),padding=(2,1))
comp_conv2d(conv2d,X)

### 步长

In [None]:
conv2d=nn.Conv2D(1,kernel_size=3,padding=1,strides=2)
comp_conv2d(conv2d,X)

In [None]:
conv2d=nn.Conv2D(1,kernel_size=(3,5),padding=(0,1),strides=(3,5))
comp_conv2d(conv2d,X)
##当strides为1时，输出shape为（6,6），所以6/5≈2，因为取不到的值默认为0

## 多输入通道及多输出通道

### 多输入通道

In [None]:
from mxnet.gluon import nn
from mxnet import nd
import d2lzh as d2l

In [None]:
def corr2d_multi_in(X,K):
    return nd.add_n(*[d2l.corr2d(x,k) for x,k in zip(X,K)])

In [None]:
X = nd.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = nd.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

corr2d_multi_in(X, K)

### 多输出通道

In [None]:
def corr2d_multi_in_out(X,K):
    return nd.stack(*[corr2d_multi_in(X,k) for k in K])

In [None]:
K = nd.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])
K=nd.stack(K,K+1,K+2)
K.shape

In [None]:
corr2d_multi_in_out(X,K)

### 1X1卷积层

In [None]:
def corr2d_multi_in_out_1x1(X,K):
    c_i,h,w=X.shape
    c_o=K.shape[0]
    X=X.reshape((c_i,h*w))
    K=K.reshape((c_o,c_i))
    Y=nd.dot(K,X)
    return Y.shape(c_o,h,w)
    ## 计算结果与corr2d_multi_in_out是一样的

## 池化层

In [None]:
from mxnet import nd
from mxnet.gluon import nn

def pool2d(X,pool_size,mode='max'):
    p_h,p_w=pool_size
    Y=nd.zeros((X.shape[0]-p_h+1,X.shape[1]-p_w+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode=='max':
                Y[i,j]=Y[i:i+p_h, j:j+p_h].max()
            if mode=='avg':
                Y[i,j]=Y[i:i+p_h, j:j+p_w].mean()
    return Y

In [None]:
X=nd.arange(16).reshape((1,1,4,4))

##默认情况下，MaxPool2D实例里步幅和池化窗口形状相同
pool2d=nn.MaxPool2D(3)
pool2d(X)

In [None]:
##也可以指定步长和填充
pool2d=nn.MaxPool2D(3,padding=1,strides=2)
pool2d(X)

In [None]:
##可以指定非正方形的池化窗口
pool2d=nn.MaxPool2D((2,3),padding=(1,2),strides=(2,3))
pool2d(X)

In [None]:
##多通道的池化
##在处理多通道输入数据时，池化层对每个输入通道分别池化，而不是像卷积层那样将各通道的输入按通道相加。
##这意味着池化层的输出通道数与输入通道数相等。

In [None]:
X=nd.concat(X,X+1,dim=1)
X

In [None]:
pool2d=nn.MaxPool2D(3,padding=1,strides=2)
pool2d(X)

## 卷积神经网络LeNet

卷积层块⾥的基本单位是卷积层后接最⼤池化层：卷积层⽤来识别图像⾥的空间模式，最大池化层用来降低卷积层对位置的敏感性

### 构建模型

In [1]:
from mxnet.gluon import loss as gloss,nn
from mxnet import autograd as ag, nd, init, gluon
import d2lzh as d2l

In [22]:
net=nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, activation="sigmoid"),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=3, kernel_size=5, activation='sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        ##Dense会默认把批量大小*通道*高*宽的输入转化为（批量大小，通道*高*宽）
        nn.Dense(120, activation='sigmoid'),
        nn.Dense(84, activation="sigmoid"),
        nn.Dense(10))

In [33]:
net=nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
       nn.MaxPool2D(pool_size=2, strides=2),
       nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
       nn.MaxPool2D(pool_size=2, strides=2),
       nn.Dense(256, activation='sigmoid'),
       nn.Dense(84, activation='sigmoid'),
       nn.Dense(10))

In [34]:
X=nd.random.uniform(shape=(1,1,28,28))
net.initialize()

In [35]:
for layer in net:
    X=layer(X)
    print(layer.name, X.shape)

conv4 (1, 6, 24, 24)
pool4 (1, 6, 12, 12)
conv5 (1, 16, 8, 8)
pool5 (1, 16, 4, 4)
dense6 (1, 256)
dense7 (1, 84)
dense8 (1, 10)


### 训练模型

In [36]:
batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=batch_size)

In [37]:
def evaluate_accuracy(data_iter,net):
    n,acc_sum=0,0.0##若将该初始化放到for循环内，将使得每batch_size就归零
    for X,y in data_iter:
        y_hat=net(X)
        y=y.astype('float32')
        acc_sum+=(y==y_hat.argmax(axis=1)).sum().asscalar()
        n+=y.size
    return acc_sum/n

In [38]:
def entropy_loss(y_hat,y):
    ##由于神经网络的前向计算和交叉熵是分开算的，所以容易出现溢出
    return -nd.pick(y_hat,y).log()

def sgd(lr,params,batch_size):
    for param in params:
        param[:]-=lr*param.grad/batch_size

def train_ch5(net,train_iter,test_iter,trainer,lr,num_epochs,batch_size):
    loss=gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        n,train_acc=0,0.0
        for X,y in train_iter:
            with ag.record():
                y_hat=net(X)
                l=loss(y_hat,y).sum()
            ##print(entropy_loss(y_hat,y).sum().asscalar())
            ##从该语句可以发现，存在溢出
            l.backward()
            ##trainer(lr,net.collect_params(),batch_size)##
            ##卷积神经网络出来的参数是特殊的参数字典，此处直接用gluon自带的trainer函数##
            trainer.step(batch_size)
            y=y.astype("float32")
            train_acc+=(y_hat.argmax(axis=1)==y).sum().asscalar()##此处若将y_hat用net(X)代替，参数将仍是原来参数
            n+=y.size
        test_acc=evaluate_accuracy(test_iter,net)
        print('N0.%d, train_acc:%.3f , test_acc:%.3f '%(epoch+1,train_acc/n,test_acc))

net.initialize(force_reinit=True,init=init.Xavier())
lr,num_epochs=0.9,5
trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train_ch5(net,train_iter,test_iter,trainer,lr,num_epochs,batch_size)

N0.1, train_acc:0.101 , test_acc:0.100 
N0.2, train_acc:0.261 , test_acc:0.550 
N0.3, train_acc:0.617 , test_acc:0.686 
N0.4, train_acc:0.701 , test_acc:0.700 
N0.5, train_acc:0.735 , test_acc:0.761 


In [None]:
gluon.Trainer??

In [21]:
gloss.SoftmaxCrossEntropyLoss??

## 深度神经网络AlexNet