## 定义网络


    input(N,1,28,28)=>conv(32,3,3)=>conv(32,3,3)=>relu=>max_pooling(2,2)=>conv(64,3,3)=>conv(64,3,3)=>global_avg_pooling=>fc(10)=>softmax

                    (N,32,26,26)  (N,32,24,24)           (N,32,12,12)      (N,64,10,10)   (N,64,8,8)      (N,64)       (N,10)   


In [1]:
def max_pooling_forward(z, pooling, strides=(2, 2), padding=(0, 0)):
    """
    最大池化前向过程
    :param z: 卷积层矩阵,形状(N,C,H,W)，N为batch_size，C为通道数
    :param pooling: 池化大小(k1,k2)
    :param strides: 步长
    :param padding: 0填充
    :return:
    """
    N, C, H, W = z.shape
    # 零填充
    padding_z = np.lib.pad(z, ((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])), 'constant', constant_values=0)

    # 输出的高度和宽度
    out_h = (H + 2 * padding[0] - pooling[0]) // strides[0] + 1
    out_w = (W + 2 * padding[1] - pooling[1]) // strides[1] + 1

    pool_z = np.zeros((N, C, out_h, out_w))
    for n in np.arange(N):
        for c in np.arange(C):
            for i in np.arange(out_h):
                for j in np.arange(out_w):

                    pool_z[n, c, i, j] = np.max(padding_z[n, c,
                                                          strides[0] * i:strides[0] * i + pooling[0],
                                                          strides[1] * j:strides[1] * j + pooling[1]])
    return pool_z

In [2]:
import numpy as np
# 定义权重、神经元、梯度
weights={}
weights_scale=1e-3
weights["K1"]=weights_scale*np.random.randn(1,32,3,3)
weights["b1"]=np.zeros(32)
weights["K2"]=weights_scale*np.random.randn(32,32,3,3)
weights["b2"]=np.zeros(32)
weights["K3"]=weights_scale*np.random.randn(32,64,3,3)
weights["b3"]=np.zeros(64)
weights["K4"]=weights_scale*np.random.randn(64,64,3,3)
weights["b4"]=np.zeros(64)
weights["W5"]=weights_scale*np.random.randn(64,10)
weights["b5"]=np.zeros(10)

nuerons={}
gradients={}

In [3]:
from nn.layers import conv_forward,conv_backward,fc_forward,fc_backward
from nn.layers import max_pooling_backward,global_avg_pooling_forward,global_avg_pooling_backward #,max_pooling_forward
from nn.activations import relu_forward,relu_backward
from nn.losses import cross_entropy_loss
# 定义前向传播
def forward(X):
    nuerons["conv1"]=conv_forward(X,weights["K1"],weights["b1"])
    nuerons["conv2"]=conv_forward(nuerons["conv1"],weights["K2"],weights["b2"])
    nuerons["conv2_relu"]=relu_forward(nuerons["conv2"])
    nuerons["maxp"]=max_pooling_forward(nuerons["conv2_relu"],pooling=(2,2))
    nuerons["conv3"]=conv_forward(nuerons["maxp"],weights["K3"],weights["b3"])
    nuerons["conv4"]=conv_forward(nuerons["conv3"],weights["K4"],weights["b4"])
    nuerons["gavgp"]=global_avg_pooling_forward(nuerons["conv4"])
    nuerons["y"]=fc_forward(nuerons["gavgp"],weights["W5"],weights["b5"])
    print("conv4.shape:{}".format(nuerons["conv4"].shape))
    return nuerons["y"]

In [4]:
# 定义反向传播
def backward(X,y_true):
    loss,dy=cross_entropy_loss(nuerons["y"],y_true)
    print("dy:{}".format(dy.shape))
    gradients["W5"],gradients["b5"],gradients["gavgp"]=fc_backward(dy,weights["W5"],nuerons["gavgp"])
    print("gavgp:{}".format(gradients["gavgp"].shape))
    gradients["conv4"]=global_avg_pooling_backward(gradients["gavgp"],nuerons["conv4"])
    print("conv4:{}".format(gradients["conv4"].shape))
    gradients["K4"],gradients["b4"],gradients["conv3"]=conv_backward(gradients["conv4"],weights["K4"],nuerons["conv3"])
    print("conv3:{}".format(gradients["conv3"].shape))
    gradients["K3"],gradients["b3"],gradients["maxp"]=conv_backward(gradients["conv3"],weights["K3"],nuerons["maxp"])
    print("maxp:{}".format(gradients["maxp"].shape))
    gradients["conv2_relu"]=max_pooling_backward(gradients["maxp"],nuerons["conv2_relu"],pooling=(2,2))
    print("conv2_relu:{}".format(gradients["conv2_relu"].shape))
    gradients["conv2"]=relu_backward(gradients["conv2_relu"],nuerons["conv2"])
    print("conv2:{}".format(gradients["conv2"].shape))
    gradients["K2"],gradients["b2"],gradients["conv1"]=conv_backward(gradients["conv2"],weights["K2"],nuerons["conv1"])
    print("conv1:{}".format(gradients["conv1"].shape))
    gradients["K1"],gradients["b1"],_=conv_backward(gradients["conv1"],weights["K1"],X)
    return loss

In [8]:
# 获取精度
def get_accuracy(X,y_true):
    y_predict=forward(X)
    return np.mean(np.equal(np.argmax(y_predict,axis=-1),
                            np.argmax(y_true,axis=-1)))

In [5]:
from nn.load_mnist import load_mnist_datasets
from nn.utils import to_categorical
train_set, val_set, test_set = load_mnist_datasets('mnist.pkl.gz')
train_x,val_x,test_x=np.reshape(train_set[0],(-1,1,28,28)),np.reshape(val_set[0],(-1,1,28,28)),np.reshape(test_set[0],(-1,1,28,28))

train_y,val_y,test_y=to_categorical(train_set[1]),to_categorical(val_set[1]),to_categorical(test_set[1])

In [6]:
# 随机选择训练样本
train_num = train_set[0].shape[0]
def next_batch(batch_size):
    idx=np.random.choice(train_num,batch_size)
    return train_x[idx],train_y[idx]

x,y= next_batch(16)
print("x.shape:{},y.shape:{}".format(x.shape,y.shape))

x.shape:(16, 1, 28, 28),y.shape:(16, 10)


In [7]:
from nn.optimizers import SGD
# 初始化变量
batch_size=2
epoch = 3
steps = train_num // batch_size
lr = 0.1

for e in range(epoch):
    for s in range(steps):
        X,y=next_batch(batch_size)
        
        # 前向过程
        forward(X)
        loss=backward(X,y)
        
        # 更新梯度
        for k in weights.keys():
            weights[k]-=lr*gradients[k]
        
        if s % 10 ==0:
            print("\n epoch:{} step:{} ; loss:{}".format(e,s,loss))
            print(" train_acc:{};  val_acc:{}".format(get_accuracy(X,y),get_accuracy(val_x,val_y)))

            
print("\n final result test_acc:{};  val_acc:{}".
      format(get_accuracy(test_x,test_y),get_accuracy(val_x,val_y)))

conv4.shape:(2, 64, 8, 8)
dy:(2, 10)
gavgp:(2, 64)
conv4:(2, 64, 8, 8)
conv3:(2, 64, 10, 10)
maxp:(2, 32, 12, 12)
conv2_relu:(2, 32, 24, 24)
conv2:(2, 32, 24, 24)
conv1:(2, 32, 26, 26)

 epoch:0 step:0 ; loss:2.3025850930358502


NameError: name 'get_accuracy' is not defined