In [1]:
import pickle

import numpy as np
import struct
import math
import os

In [2]:
# 加载minist数据集
# 加载数据集这方面的工作参考了前人的工作，网页为：https://blog.csdn.net/hxxjxw/article/details/113727973
def load_mnist(mnistdir , train):
    ministfile = open(mnistdir,'rb')
    ministdata = ministfile.read()
    ministfile.close()
    rows=1
    cols=1
    # 加载训练集
    if train:
        # 解析文件头信息，依次为魔数、图片数量、每张图片高、每张图片宽
        # 因为数据结构中前4行的数据类型都是32位整型，所以采用i格式，但我们需要读取前4行数据，所以需要4个i。我们后面会看到标签集中，只使用2个ii
        magic_num,images,rows,cols = struct.unpack_from('>iiii', ministdata,0)
    else:
        # 加载标签集
        magic_num,images = struct.unpack_from('>ii', ministdata,0)
    print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_num, images, rows, cols))
    # 计算加载的总像素是多少
    size = images * rows * cols
    # calcsize获得数据在缓存中的指针位置，从前面介绍的数据结构可以看出，读取了前4行之后，指针位置（即偏移位置offset）指向0016
    if train:
        pointer = struct.calcsize('>iiii')
    else :
        pointer =  struct.calcsize('>ii')
    pack_data = struct.unpack_from('>' + str(size) + 'B', ministdata,pointer)
    if train:
        pack_data = np.reshape(pack_data,[images,rows,cols])
    else:
        pack_data = np.reshape(pack_data,[images])
    # 最终返回了一个矩阵，矩阵的大小由是训练集还是标签集决定
    # 训练集就相当于返回好多页纸，每一页纸上面有对应的行数和列数
    print('本次解析的矩阵格式为[%d,%d,%d]' % (images,rows,cols))
    return pack_data

In [3]:
# 在本函数中完成了本地的数据加载
# 根据参数取出相应的验证集并做正则化
def load_minist_data():
    trainImages = load_mnist("data/train-images.idx3-ubyte",True)
    trainLabels = load_mnist("data/train-labels.idx1-ubyte",False)
    testImages = load_mnist("data/t10k-images.idx3-ubyte",True)
    testLabels = load_mnist("data/t10k-labels.idx1-ubyte",False)
    # 其实这里不知道为什么要pad，但还是先pad一下
    # 我觉得可能是因为卷积的时候不丢失信息吧
    # 对矩阵进行进行填充
    # https://blog.csdn.net/qq_34650787/article/details/80500407
    trainImages = np.pad(trainImages, ((0, 0), (2, 2), (2, 2)))
    testImages = np.pad(testImages, ((0, 0), (2, 2), (2, 2)))

    # 获取矩阵维度
    real_num, real_rows, real_cols = trainImages.shape
    real_num2,real_rows2,real_cols2 = testImages.shape
    # print('real_rows=%d,real_cols=%d' % (real_rows,real_cols))

    # 这个类型转换我也搞不太懂
    # 而且也不知道这个reshape的目的在哪里
    # 进行类型转换 https://blog.csdn.net/u012267725/article/details/77489244
    # 我的理解是在这里把很多页书拼起来拼成一页，但是这一页很长
    # print(trainImages.shape) #:这里他的输出是（60000,32,32）
    trainImages = trainImages.astype(np.float32).reshape(real_num, 1, real_rows, real_cols)
    testImages = testImages.astype(np.float32).reshape(real_num2, 1, real_rows, real_cols)
    # print(trainImages.shape) #:在这里输出就变成了（60000,1,32,32）

    # 接下来在训练集中划分验证集
    # 这里的参数是可以调整的
    varProof = -500 # 取最后的500个
    proofImages = trainImages[varProof:]
    proofLabels = trainLabels[varProof:]
    trainImages = trainImages[:varProof]
    trainLabels = trainLabels[:varProof]

    # 对数据进行归一化，这里其实也可以做一个参数选项，可选可不选
    # https://blog.csdn.net/sdgfbhgfj/article/details/123780347
    if True:
        mean = np.mean(trainImages,axis=0)
    else:
        mean = np.zeros_like(trainImages)
    trainImages -= mean
    proofImages -= mean
    testImages -= mean
    print('加载完毕')
    # 依次返回训练数据&标签 验证数据&标签 测试数据&标签
    return trainImages,trainLabels,proofImages,proofLabels,testImages,testLabels

In [4]:
# 在这个cell实现随机梯度下降
"""
随机梯度下降算法（Stochastic gradient descent，SGD）在神经网络模型训练中，是一种很常见的优化算法。
这个算法的流程就是在每次更新的时候使用一个样本进行梯度下降，所谓的随机二字，就是说我们可以随机用一个样本来表示所有的样本，来调整超参数θ
程序中的参数命名来源于下面的博客：
https://blog.csdn.net/Oscar6280868/article/details/90641638
"""
def sgd(theta,xj,parameters):
    if parameters is None:
        # 创建学习率参数
        parameters = {'learn_rate': 1e-2}
    theta = theta - parameters['learn_rate'] * xj
    return theta,parameters

In [5]:
# 实现一个自适应动量的随机优化方法
# 我觉得到时候看看能不能把这个删掉...太明显了
"""
论文：https://arxiv.org/pdf/1412.6980.pdf
参数说明：
t t：更新的步数（steps）
α lphaα：学习率，用于控制步幅（stepsize）
θ hetaθ：要求解（更新）的参数
β1 一阶矩衰减系数
β2 二阶矩衰减系数
f（θ） 目标函数
g 目标函数对θ求导所得梯度
m 梯度g的一阶矩
v 梯度g的二阶矩

参考的博客：https://blog.csdn.net/sinat_36618660/article/details/100026261
"""
def adam(theta,xj,parameters):
    if parameters is None:
        parameters={}
    parameters.setdefault("learn_rate", 1e-3)
    parameters.setdefault("beta1", 0.9)
    parameters.setdefault("beta2", 0.999)
    parameters.setdefault("e", 1e-8)
    parameters.setdefault("m", np.zeros_like(theta))
    parameters.setdefault("v", np.zeros_like(theta))
    parameters.setdefault("t", 0)


    b1=parameters['beta1']
    b2=parameters['beta2']
    parameters['t'] = parameters['t'] + 1
    parameters['m'] = ((1 - b1) * xj) + (b1 * parameters['m'])
    parameters['v'] = ((1 - b2) * (xj ** 2)) + (b2 * parameters['v'])
    temp1 = 1 - (b1 ** parameters['t'])
    temp2 = 1 - (b2 ** parameters['t'])
    new_theta = theta - parameters['learn_rate'] * (parameters['m'] / temp1) / (np.sqrt(parameters['v'] / temp2) + parameters['e'])
    return new_theta,parameters

In [6]:

class Train:
    """
    Lenet5 相当于传进来一个类，这个类保存了一次训练需要的信息
    data
    epoch 所有样本都跑过一遍所需次数
    func 选择哪个参数优化函数
    parameter
    decrease 学习率下降数
    batchSize 每次只使用数据集中的部分样本
    exp
    """
    # 这里其实可以初始化一下
    def __init__(self, Lenet5, data, epoch, func, parameter, decrease, batchSize, exp):
        self.model = Lenet5
        self.train1 = data["train1"] #在传进来data前也得注意
        self.train2 = data["train2"]
        self.label1 = data["label1"]
        self.label2 = data["label2"]
        self.func = func #让他指向了指定的那个函数
        self.learn_rate_decay = decrease #学习率下降
        self.batchSize = batchSize #一次使用的样本数
        self.exp = exp #不知道干啥用的 完全没用到
        self.epoch = epoch

        # 保存最高准确率以及最佳参数
        self.bestAccurate = 0
        self.bestPara = {}

        # 为了绘图
        self.losses = []
        self.TrainAccurates = []
        self.ProofAccurates = []

        # 按照他的意思这个东西是深度拷贝
        self.funcParameter = {}
        for i in self.model.para:
            d = {k: v for k,v in parameter.items()}
            self.funcParameter[i] = d

    # 直接返回向量
    def calAccuracy(self,train,label,batchSize):
        # 需要做几轮运算
        myRound = math.ceil(train.shape[0] / batchSize)
        # 最终的预测结果
        prediction = []
        for i in range(myRound):
            start = i*batchSize
            end = (i+1)*batchSize
            # 这个不太懂loss算了个什么
            # 为什么只用给train就行了，还有就是train是什么，最后数组保存的是什么
            # 实际loss返回两个值
            acc = self.model.spread2(train[start:end]) #计算损失
            p = np.argmax(acc,axis=1) #找出最大值
            prediction.append(p)
        prediction = np.hstack(prediction) #横向拼接向量
        return prediction

    def calAccuracy2(self,train,label,batchSize):
        prediction = self.calAccuracy(train, label, batchSize)
        return np.mean(prediction == label)

    def myTrain(self):
        # 为了绘图 把这几个先初始化为空
        self.losses = []
        self.TrainAccurates = []
        self.ProofAccurates = []
        # 按照不同的轮以对应的batchsize训练
        for i in range(self.epoch):
            for j in range(math.ceil(self.train1.shape[0] / self.batchSize)-1):
                start = j * self.batchSize
                end = (j+1) * self.batchSize
                arr = np.arange(start,end)
                batch_train = self.train1[arr]
                batch_label = self.label1[arr]
                # 我觉得在算loss的时候就更新了model的params
                loss,res = self.model.spread(batch_train,batch_label)
                self.losses.append(loss)
                for k,v in self.model.para.items():
                    if self.func =='sgd':
                        self.model.para[k],self.funcParameter[k] = sgd(v,res[k],self.funcParameter[k])
                    else:
                        self.model.para[k],self.funcParameter[k] = adam(v,res[k],self.funcParameter[k])
                print('第 %d 轮第 %d 个batch中loss为 %f' % (i+1,j+1,loss))

            a = self.calAccuracy2(self.train1,self.label1,100)
            b = self.calAccuracy2(self.train2,self.label2,100)
            self.TrainAccurates.append(a)
            self.ProofAccurates.append(b)

            print('第 %d 次迭代，训练集正确率 %f 验证集正确率 %f' % (i+1,a,b))

            if b > self.bestAccurate :
                self.bestAccurate = b
                self.bestPara.clear() #更新自己参数里的最佳参数
                for k,v in self.model.para.items():
                    self.bestPara[k] = v.copy()

            if i+1 == self.epoch:
                filename = "TrainingResult"
                with open(filename,'wb') as f:
                    pickle.dump(self.model,f)
                    print("成功保存模型")

            if i > 10:
                for q in self.funcParameter:
                    self.funcParameter[q]['learn_rate'] *= self.learn_rate_decay

        # 我觉得这里最后一轮出最佳的话就保存不了了啊
        self.model.params = self.bestPara

In [7]:
# 卷积层前向传播的过程
def convolution_spread_forward(train, convolution_entity, bias, para):
    pic_num = train.shape[0]
    channel = train.shape[1]
    height = train.shape[2]
    wide = train.shape[3]
    convolution_num = convolution_entity.shape[0]
    convolution_height = convolution_entity.shape[2]
    convolution_wide = convolution_entity.shape[3]
    step = para['step'] #这里代表着步长
    padding = para['pad'] # 填充部分的大小
    # 这里可以改成函数外提
    new_train = np.pad(train,((0,0),(0,0),(padding,padding),(padding,padding)))
    # 这里也可以函数外提
    loop_outside  = height + 2 * padding - convolution_height
    loop_outside  = loop_outside // step
    loop_outside  = loop_outside + 1
    loop_inside = wide + 2 * padding - convolution_wide
    loop_inside = loop_inside // step
    loop_inside = loop_inside + 1
    weight = np.zeros((pic_num, convolution_num, loop_outside, loop_inside))
    for i in range(loop_outside):
        for j in range(loop_inside):
            temp = []
            # 这个矩阵转换必须外提做函数
            temp = new_train[:,:,step*i:step*i+convolution_height,step*j:step*j+convolution_wide]
            temp = temp.reshape(pic_num,1,channel,convolution_height,convolution_wide)
            new_convolution = convolution_entity.reshape(1,convolution_num,channel,convolution_height,convolution_wide)
            # 这个也可以进行函数外提
            weight[:,:,i,j] = np.sum(temp * new_convolution,axis=(-3,-2,-1))
            weight[:,:,i,j] += bias
    # 这个也得封装
    package = (train,convolution_entity,bias,para)
    return weight,package

In [8]:
def convolution_spread_backward(derivation,pack):
    num = derivation.shape[0]
    convolution_num = derivation.shape[1]
    loop_outside = derivation.shape[2]
    loop_inside = derivation.shape[3]
    train = pack[0]
    height = train.shape[2]
    wide = train.shape[3]
    convolution = pack[1]
    derivation_convolution = np.zeros_like(convolution)
    channel =convolution.shape[1]
    convolution_height = convolution.shape[2]
    convolution_wide = convolution.shape[3]
    bias = pack[2]
    derivation_bias = np.zeros_like(bias)
    para = pack[3]
    padding = para['pad']
    step = para['step']
    # 这个可以封装成一个比较复杂的函数
    new_train = np.pad(train,((0,0),(0,0),(padding,padding),(padding,padding)))
    derivation_train = np.zeros_like(new_train)
    for i in range(loop_outside):
        for j in range(loop_inside):
            #都可以外提成函数
            temp = derivation[:,:,i,j]
            temp = temp.reshape((num,1,1,1,convolution_num))
            temp2 = convolution.transpose((1,2,3,0))
            temp2 = temp2.reshape((1,channel,convolution_height,convolution_wide,convolution_num))
            temp_sum = np.sum(temp*temp2,axis=-1)
            derivation_train[:,:,step*i:step*i+convolution_height,step*j:step*j+convolution_wide] +=temp_sum
            temp3 = derivation[:,:,i,j].T
            temp3 = temp3.reshape((convolution_num,1,1,1,num))
            temp4 = new_train[:,:,step*i:step*i+convolution_height,step*j:step*j+convolution_wide].transpose(1,2,3,0)
            # 外提做函数
            temp_sum = np.sum(temp3*temp4,axis=-1)
            derivation_convolution += temp_sum
            temp_sum = np.sum(derivation[:,:,i,j],axis=0)
            derivation_bias += temp_sum

    # 这个必须外提
    derivation_weight = []
    derivation_weight = derivation_train[:,:,padding:padding+height,padding:padding+wide]
    return derivation_weight,derivation_convolution,derivation_bias

In [9]:
def relu_spread_froward(Input):
    # 由于老师在课堂上强调sigmoid的梯度消失问题
    # 解决办法可以为替换激活函数
    # 使用relu作为激活函数
    temp = np.maximum(0,Input)
    return temp,Input

In [10]:
def relu_spread_backward(derivation,Input):
    # relu函数的反向传播函数
    temp = Input
    temp[temp>0]=1
    temp[temp<=0]=0
    derivation_train = derivation * Input
    return derivation_train

In [11]:
def pool_spread_forward(train,para):
    pool_height = para['pool_height'] #传入的时候也要按照这个格式命名
    pool_wide = para['pool_wide']
    step = para['step']
    train_num = train.shape[0]
    channel = train.shape[1]
    height = train.shape[2]
    wide = train.shape[3]
    loop_outside = height - pool_height
    loop_outside = loop_outside // step
    loop_outside = loop_outside + 1
    loop_inside = wide - pool_wide
    loop_inside = loop_inside // step
    loop_inside = loop_inside + 1
    ret = np.zeros(shape=(train_num,channel,loop_outside,loop_inside))
    for i in range(loop_outside):
        for j in range(loop_inside):
            temp = train[:,:,i*step:i*step+pool_height,j*step:j*step+pool_wide]
            ret[:,:,i,j]=np.max(temp,axis=(2,3))
    pack = (train,para)
    return ret,pack

In [12]:
def pool_spread_backward(derivation,pack):
    train = pack[0]
    derivation_train = np.zeros_like(train)
    train_num = train.shape[0]
    channel = train.shape[1]
    height = train.shape[2]
    wide = train.shape[3]
    para = pack[1]
    pool_height = para['pool_height']
    pool_wide = para['pool_wide']
    step = para['step']
    loop_outside = height - pool_height
    loop_outside = loop_outside // step
    loop_outside = loop_outside + 1
    loop_inside = wide - pool_wide
    loop_inside = loop_inside // step
    loop_inside = loop_inside + 1
    for i in range(loop_outside):
        for j in range(loop_inside):
            temp = train[:,:,i*step:i*step+pool_height,j*step:j*step+pool_wide]
            temp = temp.reshape(train_num,channel,-1)
            tempMax = np.argmax(temp,axis=-1)
            # 这个必须外提
            temp_max = np.unravel_index(tempMax,(pool_height,pool_wide))
            which = np.array(temp_max)
            for k in range(train_num):
                for l in range(channel):
                    # 这个都可以外提其实
                    temp2 = derivation[k][l][i][j]
                    index_3 = i*step+which[0][k][l]
                    index_4 = j*step+which[1][k][l]
                    derivation_train[k][l][index_3][index_4] += temp2

    return derivation_train

In [13]:
def full_connect_spread_forward(train,weight,bias):
    # 全连接层的前向传播
    index_0 = train.shape[0]
    temp = train.reshape(index_0,-1)
    temp = temp.dot(weight) + bias
    pack = (train,weight,bias)
    return temp,pack

In [14]:
def full_connect_spread_backward(derivation,pack):
    train = pack[0]
    index_0 = train.shape[0]
    weight = pack[1]
    weight_T = weight.T
    bias = pack[2]
    derivation_train = derivation.dot(weight_T)
    derivation_train = derivation_train.reshape(train.shape)
    temp = train.reshape(index_0,-1)
    temp_T = temp.T
    derivation_weight = temp_T.dot(derivation)
    derivation_bias = np.sum(derivation,axis=0)
    return derivation_train,derivation_weight,derivation_bias

In [15]:
def softmax(Input,Output):
    # softmax损失函数的计算
    data_num = Input.shape[0]
    cal_e = np.exp(Input)
    cal_sum = np.sum(cal_e,axis=1)
    cal_log = np.log(cal_sum)
    temp = Input[range(data_num),list(Output)]
    ret = np.mean(cal_log-temp)
    temp_sum = cal_sum.reshape(data_num,1)
    derivation_Input = cal_e / temp_sum
    # 必须外提
    derivation_Input[range(data_num),list(Output)] = derivation_Input[range(data_num),list(Output)]-1
    derivation_Input = derivation_Input / data_num
    return ret,derivation_Input

In [16]:
class MyLenet5:
    def __init__(self):
        # 输入的图像通道为1
        # 输入图像大小为32*32
        self.input_channel = 1
        self.input_height = 32
        self.input_wide = 32
        # 第一次卷积核有6个
        # 第二次卷积核有16个
        # 卷积核大小都是5*5
        self.convolution_num1 = 6
        self.convolution_num2 = 16
        self.convolution_height = 5
        self.convolution_wide =5
        # 全连接层的大小
        self.full_connect1 = 400
        self.full_connect2 = 120
        self.full_connect3 = 84
        # 输出类型个数
        self.output_size = 10
        # lenet5的参数
        # 卷积池化层的参数和偏置项
        # 全连接层的参数和偏置项
        self.para = {}
        # 正则项
        self.regularItem = 0.001
        self.datatype = np.float32
        # 生成时必要的标准差
        self.standard_deviation = 0.001

        #首先初始化第一个卷积层
        convolution1_para_weight = np.random.normal(
            loc=0.0,
            scale=self.standard_deviation,
            size=(self.convolution_num1,self.input_channel,self.convolution_height,self.convolution_wide)
        )
        convolution1_para_bias = np.zeros(self.convolution_num1)

        #接下来初始化第二个卷积层
        convolution2_para_weight = np.random.normal(
            loc = 0.0,
            scale = self.standard_deviation,
            size = (self.convolution_num2,self.convolution_num1,self.convolution_height,self.convolution_wide)
        )
        convolution2_para_bias = np.zeros(self.convolution_num2)

        #接下来初始化第一个全连接层
        full_connect1_para_weight = np.random.normal(
            loc = 0.0,
            scale = self.standard_deviation,
            size = (self.full_connect1,self.full_connect2)
        )
        full_connect1_para_bias = np.zeros(self.full_connect2)

        #接下来初始化第二个全连接层
        full_connect2_para_weight = np.random.normal(
            loc=0.0,
            scale = self.standard_deviation,
            size = (self.full_connect2,self.full_connect3)
        )
        full_connect2_para_bias = np.zeros(self.full_connect3)

        #接下来初始化第三个全连接层
        full_connect3_para_weight = np.random.normal(
            loc=0.0,
            scale = self.standard_deviation,
            size = (self.full_connect3,self.output_size)
        )
        full_connect3_para_bias = np.zeros(self.output_size)

        self.para['convolution1_para_weight'] = convolution1_para_weight
        self.para['convolution1_para_bias'] = convolution1_para_bias
        self.para['convolution2_para_weight'] = convolution2_para_weight
        self.para['convolution2_para_bias'] = convolution2_para_bias
        self.para['full_connect1_para_weight'] = full_connect1_para_weight
        self.para['full_connect1_para_bias'] = full_connect1_para_bias
        self.para['full_connect2_para_weight'] = full_connect2_para_weight
        self.para['full_connect2_para_bias'] = full_connect2_para_bias
        self.para['full_connect3_para_weight'] = full_connect3_para_weight
        self.para['full_connect3_para_bias'] = full_connect3_para_bias

        self.changeType()

    def changeType(self):
        for k,v in self.para.items():
            self.para[k] = v.astype(np.float32)

    # 这个在不同的层那里要改
    def spread(self,train,label):
        # 卷积层步长为1，没有padding
        convolution_para = {}
        convolution_para.clear()
        convolution_para['step']=1
        convolution_para['pad']=0

        # 池化层步长为2 范围就是【2,2】
        pooling_para={}
        pooling_para.clear()
        pooling_para['step']=2
        pooling_para['pool_height']=2
        pooling_para['pool_wide']=2

        # 拿出所有需要的权重
        c1w = self.para['convolution1_para_weight']
        temp1 = np.sum(c1w*c1w)
        c1b = self.para['convolution1_para_bias']
        c2w = self.para['convolution2_para_weight']
        temp2 = np.sum(c2w*c2w)
        c2b = self.para['convolution2_para_bias']
        fc3w = self.para['full_connect1_para_weight']
        temp3 = np.sum(fc3w*fc3w)
        fc3b = self.para['full_connect1_para_bias']
        fc4w = self.para['full_connect2_para_weight']
        temp4 = np.sum(fc4w*fc4w)
        fc4b = self.para['full_connect2_para_bias']
        fc5w = self.para['full_connect3_para_weight']
        temp5 = np.sum(fc5w*fc5w)
        fc5b = self.para['full_connect3_para_bias']

        cal_temp = temp1 + temp2 +temp3 +temp4 +temp5
        cal_temp = (self.regularItem*cal_temp)/2
        #开始前向传播

        # 第一次卷积层+relu+第一层池化层
        c1_res,c1_pack = convolution_spread_forward(train,c1w,c1b,convolution_para)

        c1_relu_res,c1_relu_pack = relu_spread_froward(c1_res)

        c1_pool_res,c1_pool_pack = pool_spread_forward(c1_relu_res,pooling_para)

        # 第二次卷积层+relu+第二层池化层
        c2_res,c2_pack = convolution_spread_forward(c1_pool_res,c2w,c2b,convolution_para)

        c2_relu_res,c2_relu_pack = relu_spread_froward(c2_res)

        c2_pool_res,c2_pool_pack = pool_spread_forward(c2_relu_res,pooling_para)

        # 第一层全连接层+relu层
        c3_res,c3_pack = full_connect_spread_forward(c2_pool_res,fc3w,fc3b)

        c3_relu_res,c3_relu_pack = relu_spread_froward(c3_res)

        # 第二层全连接层
        c4_res,c4_pack = full_connect_spread_forward(c3_relu_res,fc4w,fc4b)

        c4_relu_res,c4_relu_pack = relu_spread_froward(c4_res)

        # 输出层
        c5_out,c5_pack = full_connect_spread_forward(c4_relu_res,fc5w,fc5b)

        # 开始反向传播

        # 声明校正值为0.0
        correction1 = 0.0
        correction2 = 0.0
        correction3 = 0.0
        correction4 = 0.0
        correction5 = 0.0
        correction1_2 = 0.0
        correction2_2 = 0.0
        correction3_2 = 0.0
        correction4_2 = 0.0
        correction5_2 = 0.0


        l,derivation_train = softmax(c5_out,label)

        l = l + cal_temp

        derivation_train,correction5,correction5_2 = full_connect_spread_backward(derivation_train,c5_pack)
        derivation_train = relu_spread_backward(derivation_train,c4_relu_pack)
        correction5 =correction5+self.regularItem*self.para['full_connect3_para_weight']

        derivation_train,correction4,correction4_2 = full_connect_spread_backward(derivation_train,c4_pack)
        derivation_train = relu_spread_backward(derivation_train,c3_relu_pack)
        correction4 =correction4+self.regularItem*self.para['full_connect2_para_weight']

        derivation_train,correction3,correction3_2 = full_connect_spread_backward(derivation_train,c3_pack)
        derivation_train = pool_spread_backward(derivation_train,c2_pool_pack)
        derivation_train = relu_spread_backward(derivation_train,c2_relu_pack)
        correction3 =correction3+self.regularItem*self.para['full_connect1_para_weight']

        derivation_train,correction2,correction2_2 = convolution_spread_backward(derivation_train,c2_pack)
        derivation_train = pool_spread_backward(derivation_train,c1_pool_pack)
        derivation_train = relu_spread_backward(derivation_train,c1_relu_pack)
        correction2 =correction2+self.regularItem*self.para['convolution2_para_weight']

        derivation_train,correction1,correction1_2 = convolution_spread_backward(derivation_train,c1_pack)
        correction1 =correction1+self.regularItem*self.para['convolution1_para_weight']

        corr = {}
        corr['convolution1_para_weight'] = correction1
        corr['convolution1_para_bias'] = correction1_2

        corr['convolution2_para_weight'] = correction2
        corr['convolution2_para_bias'] = correction2_2

        corr['full_connect1_para_weight'] = correction3
        corr['full_connect1_para_bias'] = correction3_2

        corr['full_connect2_para_weight'] = correction4
        corr['full_connect2_para_bias'] = correction4_2

        corr['full_connect3_para_weight'] = correction5
        corr['full_connect3_para_bias'] = correction5_2

        return l,corr

    def spread2(self,train):
        # 卷积层步长为1，没有padding
        convolution_para = {}
        convolution_para.clear()
        convolution_para['step']=1
        convolution_para['pad']=0

        # 池化层步长为2 范围就是【2,2】
        pooling_para={}
        pooling_para.clear()
        pooling_para['step']=2
        pooling_para['pool_height']=2
        pooling_para['pool_wide']=2

        # 拿出所有需要的权重
        c1w = self.para['convolution1_para_weight']
        temp1 = np.sum(c1w*c1w)
        c1b = self.para['convolution1_para_bias']
        c2w = self.para['convolution2_para_weight']
        temp2 = np.sum(c2w*c2w)
        c2b = self.para['convolution2_para_bias']
        fc3w = self.para['full_connect1_para_weight']
        temp3 = np.sum(fc3w*fc3w)
        fc3b = self.para['full_connect1_para_bias']
        fc4w = self.para['full_connect2_para_weight']
        temp4 = np.sum(fc4w*fc4w)
        fc4b = self.para['full_connect2_para_bias']
        fc5w = self.para['full_connect3_para_weight']
        temp5 = np.sum(fc5w*fc5w)
        fc5b = self.para['full_connect3_para_bias']

        cal_temp = temp1 + temp2 +temp3 +temp4 +temp5
        cal_temp = (self.regularItem*cal_temp)/2
        #开始前向传播

        # 第一次卷积层+relu+第一层池化层
        c1_res,c1_pack = convolution_spread_forward(train,c1w,c1b,convolution_para)

        c1_relu_res,c1_relu_pack = relu_spread_froward(c1_res)

        c1_pool_res,c1_pool_pack = pool_spread_forward(c1_relu_res,pooling_para)

        # 第二次卷积层+relu+第二层池化层
        c2_res,c2_pack = convolution_spread_forward(c1_pool_res,c2w,c2b,convolution_para)

        c2_relu_res,c2_relu_pack = relu_spread_froward(c2_res)

        c2_pool_res,c2_pool_pack = pool_spread_forward(c2_relu_res,pooling_para)

        # 第一层全连接层+relu层
        c3_res,c3_pack = full_connect_spread_forward(c2_pool_res,fc3w,fc3b)

        c3_relu_res,c3_relu_pack = relu_spread_froward(c3_res)

        # 第二层全连接层
        c4_res,c4_pack = full_connect_spread_forward(c3_relu_res,fc4w,fc4b)

        c4_relu_res,c4_relu_pack = relu_spread_froward(c4_res)

        # 输出层
        c5_out,c5_pack = full_connect_spread_forward(c4_relu_res,fc5w,fc5b)

        return c5_out

In [17]:
# 调用接口，完成数据加载并打包
trainImages,trainLabels,proofImages,proofLabels,testImages,testLabels = load_minist_data()
data = {}
data['train1']=trainImages
data['train2']=proofImages
data['label1']=trainLabels
data['label2']=proofLabels
data['test_image']=testImages
data['test_label']=testLabels
print(data['train1'].shape)
print(data['label1'].shape)
print(data['train2'].shape)
print(data['label2'].shape)


TrainModel = MyLenet5()
myParameter = {}
myParameter['learn_rate']=1e-3
# def __init__(self, Lenet5, data, epoch, func, parameter, decrease, batchSize, exp):
myTrain = Train(Lenet5=TrainModel,data=data,epoch=10,func='adam',parameter=myParameter,decrease=0.75,batchSize=200,exp=1)
myTrain.myTrain()


魔数:2051, 图片数量: 60000张, 图片大小: 28*28
本次解析的矩阵格式为[60000,28,28]
魔数:2049, 图片数量: 60000张, 图片大小: 1*1
本次解析的矩阵格式为[60000,1,1]
魔数:2051, 图片数量: 10000张, 图片大小: 28*28
本次解析的矩阵格式为[10000,28,28]
魔数:2049, 图片数量: 10000张, 图片大小: 1*1
本次解析的矩阵格式为[10000,1,1]
加载完毕
(59500, 1, 32, 32)
(59500,)
(500, 1, 32, 32)
(500,)
第 1 轮第 1 个batch中loss为 2.302616
第 1 轮第 2 个batch中loss为 2.302497
第 1 轮第 3 个batch中loss为 2.302553
第 1 轮第 4 个batch中loss为 2.302673
第 1 轮第 5 个batch中loss为 2.302406
第 1 轮第 6 个batch中loss为 2.301991
第 1 轮第 7 个batch中loss为 2.302519
第 1 轮第 8 个batch中loss为 2.302695
第 1 轮第 9 个batch中loss为 2.302377
第 1 轮第 10 个batch中loss为 2.302433
第 1 轮第 11 个batch中loss为 2.302578
第 1 轮第 12 个batch中loss为 2.302404
第 1 轮第 13 个batch中loss为 2.302346
第 1 轮第 14 个batch中loss为 2.302132
第 1 轮第 15 个batch中loss为 2.302087
第 1 轮第 16 个batch中loss为 2.302847
第 1 轮第 17 个batch中loss为 2.301568
第 1 轮第 18 个batch中loss为 2.302360
第 1 轮第 19 个batch中loss为 2.302534
第 1 轮第 20 个batch中loss为 2.302123
第 1 轮第 21 个batch中loss为 2.301792
第 1 轮第 22 个batch中loss为 2.301903
第 1 轮第 23 个batch中los

In [21]:
print(
    "Test accuracy:",
    myTrain.calAccuracy2(data['test_image'], data['test_label'],100)
)

Test accuracy: 0.9756
