# 导入使用的库
导入实现过程中所必要的库，如tensorflow、keras、numpy等，并定义几分类任务

In [None]:
import os
import cv2
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from keras import Model, Sequential
from keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Concatenate, GlobalAveragePooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from keras.datasets import cifar10
class_number = 10    # 几分类任务

# LeNet-5
LeNet-5体系结构
LeNet-5是一种古老但经典的卷积神经网络结构，由Yann Andre LeCun、Leon Bottou、Yoshua Bengio和Patrick Haffner于1998年开发。

第一次开发时，它是为手写MNIST数字识别而设计的，并成为AlexNet和VGG等未来架构的基础。

第一个卷积块由两个卷积层和最大池化层组成，之后是一个平坦层，然后是3个密集层。


In [None]:
class LeNet5(Model):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.c1 = Conv2D(filters=6, kernel_size=(5, 5), activation='sigmoid')
        self.p1 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.c2 = Conv2D(filters=16, kernel_size=(5, 5), activation='sigmoid')
        self.p2 = MaxPool2D(pool_size=(2, 2), strides=2)

        self.flatten = Flatten()
        self.f1 = Dense(120, activation='sigmoid')
        self.f2 = Dense(84, activation='sigmoid')
        self.f3 = Dense(class_number, activation='softmax')

    def call(self, x):
        x = self.c1(x)
        x = self.p1(x)
        x = self.c2(x)
        x = self.p2(x)

        x = self.flatten(x)
        x = self.f1(x)
        x = self.f2(x)
        y = self.f3(x)

        return y


# AlexNet-8
* **前言:**
在2010年的ImageNet LSVRC-2010上，AlexNet在测试集上的top-1和top-5错误率为37.5%和17.0%。在ImageNet LSVRC-2012的比赛中，取得了top-5错误率为15.3%的成绩。**top-5 错误率：即对一张图像预测5个类别，只要有一个和人工标注类别相同就算对，否则算错。同理top-1对一张图像只预测1个类别）。**
* **ImageNet数据集:**
训练集120万张图片，验证集5万张图片，测试集15万张图片，1000个类别，并且有多种不同的分辨率。（如324*500，422*396等）
* **网络结构:**
AlexNet有6亿个参数和650,000个神经元，包含5个卷积层，依次为（11，11）卷积层步长为4、（5，5）卷积层、3个（3,3）卷积层零填充，其中第1、2、5卷积层后接有max-pooling层步长为2，3个全连接层神经元个数依次4096、4096、1000，为了减少过拟合，在全连接层使用了dropout。具体结构如下图：**（注：图为GPU并行计算，实际通道数、神经元个数为上下两个相加。）**

![AlexNet网络结构](./model_image/AlexNet.png)

[Dropout论文链接**Srivastava, Nitish, et al. "Dropout: a simple way to prevent neural networks from overfitting." Journal of Machine Learning Research 15.1 (2014): 1929-1958.**](https://dl.acm.org/doi/abs/10.5555/2627435.2670313)

* 激活函数使用relu，归一化采用局部响应归一化（LRN），当时还没有BN（BN论文15年发表的，[**Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." arXiv preprint arXiv:1502.03167 (2015)**](https://arxiv.org/abs/1502.03167)）
* 池化采用Overlapping Pooling（这个不怎么了解，用的比较多的是Max_Pool、Average_Pool）
* 缓解过拟合：数据增强（图像镜像反射和随机剪裁）、dropout

以上描述参考[知乎链接](https://zhuanlan.zhihu.com/p/42914388)

[AlexNet论文链接**ImageNet Classification with Deep Convolutional Neural Networks**](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)

In [None]:
class AlexNet8(Model):
    def __init__(self):
        super(AlexNet8, self).__init__()
        # 原文输入224*224*3，步长为4，我做的112*112，取步长为2
        self.c1 = Conv2D(filters=96, kernel_size=(11, 11), strides=2)
        self.b1 = BatchNormalization()
        self.a1 = Activation('relu')
        self.p1 = MaxPool2D(pool_size=(2, 2))
        
        self.c2 = Conv2D(filters=256, kernel_size=(5, 5))
        self.b2 = BatchNormalization()
        self.a2 = Activation('relu')
        self.p2 = MaxPool2D(pool_size=(2, 2))

        self.c3 = Conv2D(filters=384, kernel_size=(3, 3), padding='same')
        self.c4 = Conv2D(filters=384, kernel_size=(3, 3), padding='same')
        self.c5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.b3 = BatchNormalization()
        self.a3 = Activation('relu')
        self.p3 = MaxPool2D(pool_size=(2, 2))
        
        # 原论文用4096、4096、1000，修改为2048、2048、10
        self.flatten = Flatten()
        self.f1 = Dense(2048, activation='relu')
        self.d1 = Dropout(0.5)
        self.f2 = Dense(2048, activation='relu')
        self.d2 = Dropout(0.5)
        self.f3 = Dense(class_number, activation='softmax')

    def call(self, x):
        x = self.c1(x)
        x = self.b1(x)
        x = self.a1(x)
        x = self.p1(x)

        x = self.c2(x)
        x = self.b2(x)
        x = self.a2(x)
        x = self.p2(x)

        x = self.c3(x)
        x = self.c4(x)
        x = self.c5(x)
        x = self.b3(x)
        x = self.a3(x)
        x = self.p3(x)

        x = self.flatten(x)
        x = self.f1(x)
        x = self.d1(x)
        x = self.f2(x)
        x = self.d2(x)
        y = self.f3(x)

        return y
        

# GoogLeNet
* GoogLeNet在2014年的ImageNet竞赛中夺得了冠军。VGG是当年的亚军。两个模型相比，VGG模型参数量较大，且参数大部分（一半左右）集中在第一个全连接层。GoogLeNet参数主要分布在9个inception块和linear层中，模型没有VGG那么臃肿。
* GoogLeNet网络总共22层（包括池化层的话是27层），其中包含9个Inception块，最后采用全局平均池化GlobalAveragePooling2D减少模型参数。
![GoogLeNet模型结构](./model_image/GoogLeNet.png)
* **Inception module:**
在Inception module块中分为4各部分，第1部分是经过（1，1）卷积层得到输出feature_map1；第2部分是先通过（1，1）卷积层降维，再经过（3，3）卷积层的到输出feature_map2；第3部分是先通过（1，1）卷积层降维，再经过（5，5）卷积层得到输出feature_map3；第4部分是先通过（3，3）池化层，再经过（1，1）卷积层升维得到输出feature_map4。最后将4个feature_map按通道数维度进行叠加得到最终整个Inception module的输出。![Inception module结构图](./model_image/google%20inception.png)
* **(1，1)卷积层的作用：**
    * 升维/降维；
    * 跨通道信息交融；
    * 减少参数量；
    * 增加模型深度，提升模型的非线性表达能力。
* **全局平均池化GlobalAveragePooling2D：**
在输入的每一个通道上求出一个平均值，最为最终输出的一维向量上的值。如输入（7，7，1000）就在7*7中求出平均值最为最终（1*1000）向量的的值，输出向量为1维，元素个数1000。**作用：**减少参数量，更便于迁移学习。
* **辅助训练器：**
如下图，图中框出的两个softmax部分即为模型训练时的辅助训练器，在最终的损失中，两个辅助分类器的损失提供30%， loss(最终) = loss(最后一个softmax部分的损失) + 0.3*loss1(辅助分类器1的损失) + 0.3*loss2(辅助分类器2的损失)。这个是在模型训练过程中用的，有利于缓解模型加深时梯度消失，在模型训练完后，这两个辅助训练器会被删除掉。也就是在预测过程时模型是没有这两部分的。
![辅助训练器](./model_image/GoogLe_training.jpg)

[GoogLeNet论文链接**Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015.**](https://arxiv.org/abs/1409.4842)

In [None]:
'''
    Concatenate用法:
        >>> x = np.arange(20).reshape(2, 2, 5)
        >>> print(x)
        [[[ 0  1  2  3  4]
          [ 5  6  7  8  9]]
         [[10 11 12 13 14]
          [15 16 17 18 19]]]
        >>> y = np.arange(20, 30).reshape(2, 1, 5)
        >>> print(y)
        [[[20 21 22 23 24]]
         [[25 26 27 28 29]]]
        >>> tf.keras.layers.Concatenate(axis=1)([x, y])
        <tf.Tensor: shape=(2, 3, 5), dtype=int64, numpy=
        array([[[ 0,  1,  2,  3,  4],
                [ 5,  6,  7,  8,  9],
                [20, 21, 22, 23, 24]],
               [[10, 11, 12, 13, 14],
                [15, 16, 17, 18, 19],
                [25, 26, 27, 28, 29]]])>
        形参:
        axis – Axis along which to concatenate.  axis为0表示第1维，为1表示第2维，依次类推。默认为-1，最后一维
        kwargs – standard layer keyword arguments.

'''
class Inception(Model):
    def __init__(self, filters_list):
        super(Inception, self).__init__()
        # 1*1卷积部分
        self.conv1 = Conv2D(filters=filters_list[0], kernel_size=(1, 1), strides=1)
        self.BN1 = BatchNormalization()
        self.relu1 = Activation('relu')
        # 1*1 -->> 3*3 卷积部分
        self.conv2_1 = Conv2D(filters=filters_list[1], kernel_size=(1, 1), strides=1)
        self.BN2_1 = BatchNormalization()
        self.relu2_1 = Activation('relu')
        self.conv2_2 = Conv2D(filters=filters_list[2], kernel_size=(3, 3), strides=1, padding='same')
        self.BN2_2 = BatchNormalization()
        self.relu2_2 = Activation('relu')
        # 1*1 -->> 5*5 卷积部分
        self.conv3_1 = Conv2D(filters=filters_list[3], kernel_size=(1, 1), strides=1)
        self.BN3_1 = BatchNormalization()
        self.relu3_1 = Activation('relu')
        self.conv3_2 = Conv2D(filters=filters_list[4], kernel_size=(5, 5), strides=1, padding='same')
        self.BN3_2 = BatchNormalization()
        self.relu3_2 = Activation('relu')
        # MaxPool -->> 1*1 部分
        self.pool = MaxPool2D((3, 3), strides=1, padding='same')
        self.conv4_1 = Conv2D(filters=filters_list[5], kernel_size=(1, 1), strides=1)
        self.BN4_1 = BatchNormalization()
        self.relu4_1 = Activation('relu')

    def call(self, inputs):
        x = inputs
        # 第1部分卷积操作 1*1
        out1 = self.conv1(x)
        out1 = self.BN1(out1)
        out1 = self.relu1(out1)
        # 第2部分卷积操作 3*3
        out2 = self.conv2_1(x)
        out2 = self.BN2_1(out2)
        out2 = self.relu2_1(out2)
        out2 = self.conv2_2(out2)
        out2 = self.BN2_2(out2)
        out2 = self.relu2_2(out2)
        # 第3部分卷积操作 5*5
        out3 = self.conv3_1(x)
        out3 = self.BN3_1(out3)
        out3 = self.relu3_1(out3)
        out3 = self.conv3_2(out3)
        out3 = self.BN3_2(out3)
        out3 = self.relu3_2(out3)
        # 第4部分 池化卷积操作
        out4 = self.pool(x)
        out4 = self.conv4_1(out4)
        out4 = self.BN4_1(out4)
        out4 = self.relu4_1(out4)
        # DepthConcat
        output = Concatenate(axis=-1)([out1, out2, out3, out4])
        #print(output.shape)
        
        return output


'''
    实现中去除了原论文中的两个辅助训练器（inception_4a和inception_4d后面的softmax分支）
'''
class GoogLeNet(Model):
    def __init__(self):
        super(GoogLeNet, self).__init__()
        self.conv1 = Conv2D(filters=64, kernel_size=(7, 7), strides=2)
        self.pool1 = MaxPool2D((3, 3), strides=2)
        self.BN1 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.conv2 = Conv2D(filters=64, kernel_size=(1, 1), strides=1)
        self.conv3 = Conv2D(filters=192, kernel_size=(3, 3), strides=1)
        self.BN2 = BatchNormalization()
        self.relu2 = Activation('relu')
        self.pool2 = MaxPool2D((3, 3), strides=2)
        self.inception_3a = Inception([64, 96, 128, 16, 32, 32])
        self.inception_3b = Inception([128, 128, 192, 32, 96, 64])
        self.pool3 = MaxPool2D((3, 3), strides=2)
        self.inception_4a = Inception([192, 96, 208, 16, 48, 64])
        self.inception_4b = Inception([160, 112, 224, 24, 64, 64])
        self.inception_4c = Inception([128, 128, 256, 24, 64, 64])
        self.inception_4d = Inception([112, 144, 288, 32, 64, 64])
        self.inception_4e = Inception([256, 160, 320, 32, 128, 128])
        self.pool4 = MaxPool2D((3, 3), strides=2)
        self.inception_5a = Inception([256, 160, 320, 32, 128, 128])
        self.inception_5b = Inception([384, 192, 384, 48, 128, 128])
        self.avg_pool = GlobalAveragePooling2D()
        self.dropout = Dropout(0.4)
        self.linear = Dense(1000, activation='relu')
        self.FC = Dense(class_number, activation='softmax')

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.pool1(x)
        x = self.BN1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.BN2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.inception_3a(x)
        x = self.inception_3b(x)
        x = self.pool3(x)
        x = self.inception_4a(x)
        x = self.inception_4b(x)
        x = self.inception_4c(x)
        x = self.inception_4d(x)
        x = self.inception_4e(x)
        x = self.pool4(x)
        x = self.inception_5a(x)
        x = self.inception_5b(x)
        x = self.avg_pool(x)
        x = self.dropout(x)
        x = self.linear(x)
        y = self.FC(x)

        return y



# VGG16、VGG19

* **VGG原理:**
VGG采用多个连续的3*3卷积核来代替较大的卷积核（如5*5、7*7，11*11等）。在具有相同的感受野的条件下，采用小卷积核堆叠的效果是要优于较大的卷积核的，多层非线性层可以增加网络深度来保证学习更复杂的模式，同时在图片尺寸较大时，小卷积核堆叠的参数是要比大卷积核要小的，计算开销也小于大卷积核。在感受野相同的情况下，使用2个（3*3）卷积核代替一个（5*5）卷积核，使用3个（3*3）卷积核代替一个（7*7）卷积核，依次类推。

* **VGG模型架构:**
VGG主流网络结构有VGG16、VGG19。在VGG16和VGG19中都是由5个卷积块组成，其中第1块采用2个64通道的卷积层组成，第2块是由2个128通道的卷积层组成，VGG16和VGG19前面两个卷积块是一样的，主要不同在后3个卷积块，VGG19在VGG16的基础上增加了1层相同的卷积层。如VGG16第3、4、5块分别由3个256卷积层、3个512卷积层、3个512卷积层组成，而VGG19中将所有的3层都改为了4层。具体网络结构如下图所示。

![**VGG网络结构**](./model_image/VGG.png)

**[论文链接:Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).](https://arxiv.org/abs/1409.1556)**

In [None]:
class VGG16(Model):
    def __init__(self):
        super(VGG16, self).__init__()
        self.conv1 = Conv2D(filters=64, kernel_size=(3, 3), padding='same')
        self.bn1 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.conv2 = Conv2D(filters=64, kernel_size=(3, 3), padding='same')
        self.bn2 = BatchNormalization()
        self.relu2 = Activation('relu')
        self.max_pool1 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout1 = Dropout(0.2)

        self.conv3 = Conv2D(filters=128, kernel_size=(3, 3), padding='same')
        self.bn3 = BatchNormalization()
        self.relu3 = Activation('relu')
        self.conv4 = Conv2D(filters=128, kernel_size=(3, 3), padding='same')
        self.bn4 = BatchNormalization()
        self.relu4 = Activation('relu')
        self.max_pool2 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout2 = Dropout(0.2)

        self.conv5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn5 = BatchNormalization()
        self.relu5 = Activation('relu')
        self.conv6 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn6 = BatchNormalization()
        self.relu6 = Activation('relu')
        self.conv7 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn7 = BatchNormalization()
        self.relu7 = Activation('relu')
        self.max_pool3 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout3 = Dropout(0.2)
        # 后两个块用的都是 512
        self.conv8 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn8 = BatchNormalization()
        self.relu8 = Activation('relu')
        self.conv9 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn9 = BatchNormalization()
        self.relu9 = Activation('relu')
        self.conv10 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn10 = BatchNormalization()
        self.relu10 = Activation('relu')
        self.max_pool4 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout4 = Dropout(0.2)

        self.conv11 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn11 = BatchNormalization()
        self.relu11 = Activation('relu')
        self.conv12 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn12 = BatchNormalization()
        self.relu12 = Activation('relu')
        self.conv13 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn13 = BatchNormalization()
        self.relu13 = Activation('relu')
        self.max_pool5 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout5 = Dropout(0.2)
        
        # 在原论文中任务做的是1000分类，全连接层采用的神经元个数分别为4096、4096、1000。
        #在这里我的分类任务为10分类，不需要那么多神经元个数，因此改为1024、1024、10
        self.flatten = Flatten()
        self.fc1 = Dense(4096, activation='relu')
        self.d1 = Dropout(0.2)
        self.fc2 = Dense(1024, activation='relu')
        self.d2 = Dropout(0.2)
        self.FC = Dense(class_number, activation='softmax')

    def call(self, x):
        # 64通道块
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.max_pool1(x)
        x = self.dropout1(x)
        # 128通道块
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.max_pool2(x)
        x = self.dropout2(x)
        # 256通道块
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        x = self.conv6(x)
        x = self.bn6(x)
        x = self.relu6(x)
        x = self.conv7(x)
        x = self.bn7(x)
        x = self.relu7(x)
        x = self.max_pool3(x)
        x = self.dropout3(x)
        # 512通道块
        x = self.conv8(x)
        x = self.bn8(x)
        x = self.relu8(x)
        x = self.conv9(x)
        x = self.bn9(x)
        x = self.relu9(x)
        x = self.conv10(x)
        x = self.bn10(x)
        x = self.relu10(x)
        x = self.max_pool4(x)
        x = self.dropout4(x)
        # 第二个512通道块
        x = self.conv11(x)
        x = self.bn11(x)
        x = self.relu11(x)
        x = self.conv12(x)
        x = self.bn12(x)
        x = self.relu12(x)
        x = self.conv13(x)
        x = self.bn13(x)
        x = self.relu13(x)
        x = self.max_pool5(x)
        x = self.dropout5(x)
        # 全连接块
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.d1(x)
        x = self.fc2(x)
        x = self.d2(x)
        y = self.FC(x)

        return y
      

class VGG19(Model):
    def __init__(self):
        super(VGG19, self).__init__()
        self.conv1 = Conv2D(filters=64, kernel_size=(3, 3), padding='same')
        self.bn1 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.conv2 = Conv2D(filters=64, kernel_size=(3, 3), padding='same')
        self.bn2 = BatchNormalization()
        self.relu2 = Activation('relu')
        self.max_pool1 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout1 = Dropout(0.2)

        self.conv3 = Conv2D(filters=128, kernel_size=(3, 3), padding='same')
        self.bn3 = BatchNormalization()
        self.relu3 = Activation('relu')
        self.conv4 = Conv2D(filters=128, kernel_size=(3, 3), padding='same')
        self.bn4 = BatchNormalization()
        self.relu4 = Activation('relu')
        self.max_pool2 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout2 = Dropout(0.2)

        self.conv5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn5 = BatchNormalization()
        self.relu5 = Activation('relu')
        self.conv6 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn6 = BatchNormalization()
        self.relu6 = Activation('relu')
        self.conv7 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn7 = BatchNormalization()
        self.relu7 = Activation('relu')
        # 第3块在VGG16基础上新增的第4个卷积层
        self.conv3_4 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
        self.bn3_4 = BatchNormalization()
        self.relu3_4 = Activation('relu')
        self.max_pool3 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout3 = Dropout(0.2)
        # 后两个块用的都是 512
        self.conv8 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn8 = BatchNormalization()
        self.relu8 = Activation('relu')
        self.conv9 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn9 = BatchNormalization()
        self.relu9 = Activation('relu')
        self.conv10 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn10 = BatchNormalization()
        self.relu10 = Activation('relu')
        # 第4块新增层
        self.conv4_4 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn4_4 = BatchNormalization()
        self.relu4_4 = Activation('relu')
        self.max_pool4 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout4 = Dropout(0.2)

        self.conv11 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn11 = BatchNormalization()
        self.relu11 = Activation('relu')
        self.conv12 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn12 = BatchNormalization()
        self.relu12 = Activation('relu')
        self.conv13 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn13 = BatchNormalization()
        self.relu13 = Activation('relu')
        # 第5块新增层
        self.conv5_4 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
        self.bn5_4 = BatchNormalization()
        self.relu5_4 = Activation('relu')
        self.max_pool5 = MaxPool2D(pool_size=(2, 2), strides=2)
        self.dropout5 = Dropout(0.2)
        
        # 在原论文中任务做的是1000分类，全连接层采用的神经元个数分别为4096、4096、1000。
        #在这里我的分类任务为10分类，不需要那么多神经元个数，因此改为1024、1024、10
        self.flatten = Flatten()
        self.fc1 = Dense(4096, activation='relu')
        self.d1 = Dropout(0.2)
        self.fc2 = Dense(1024, activation='relu')
        self.d2 = Dropout(0.2)
        self.FC = Dense(class_number, activation='softmax')
        
    def call(self, x):
        # 第1块  64通道
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.max_pool1(x)
        x = self.dropout1(x)
        # 第2块 128通道
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.max_pool2(x)
        x = self.dropout2(x)
        # 第3块 256通道
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        x = self.conv6(x)
        x = self.bn6(x)
        x = self.relu6(x)
        x = self.conv7(x)
        x = self.bn7(x)
        x = self.relu7(x)
        # 新增
        x = self.conv3_4(x)
        x = self.bn3_4(x)
        x = self.relu3_4(x)   # 新增
        x = self.max_pool3(x)
        x = self.dropout3(x)
        # 第4块 512通道
        x = self.conv8(x)
        x = self.bn8(x)
        x = self.relu8(x)
        x = self.conv9(x)
        x = self.bn9(x)
        x = self.relu9(x)
        x = self.conv10(x)
        x = self.bn10(x)
        x = self.relu10(x)
        # 新增
        x = self.conv4_4(x)
        x = self.bn4_4(x)
        x = self.relu4_4(x)    # 新增
        x = self.max_pool4(x)
        x = self.dropout4(x)
        # 第5块 512通道
        x = self.conv11(x)
        x = self.bn11(x)
        x = self.relu11(x)
        x = self.conv12(x)
        x = self.bn12(x)
        x = self.relu12(x)
        x = self.conv13(x)
        x = self.bn13(x)
        x = self.relu13(x)
        # 新增
        x = self.conv5_4(x)
        x = self.bn5_4(x)
        x = self.relu5_4(x)    # 新增
        x = self.max_pool5(x)
        x = self.dropout5(x)
        # 全连接层
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.d1(x)
        x = self.fc2(x)
        x = self.d2(x)
        y = self.FC(x)

        return y

# ResNet 
### 残差网络(Res18、Res34、Res50、Res101、Res152)
* **前景:**
深层网络通过卷积块的简单地堆叠，要么出现梯度消失要么出现梯度爆炸的问题，这是由梯度的累乘所导致。通过简单堆叠得到的深层网络会出现模型退化现象，训练效果和测试效果均下降，这个并不是过拟合现象（训练效果很好，测试效果很差，模型泛化性能差）。
* **恒等映射:**
作者提出当构造一个深层网络时，考虑一个训练好的浅层网络，在该网络的后面添加一些卷积层构成深层的网络，构造出来的网络前面层的参数由训练好的浅层网络直接拷贝过来，那么后面添加的层只需要做一个恒等映射（identity mapping）的操作，将输入的X映射到X即可，那么这个新构造出来的网络的效果不应该比原来的浅层网络要差。但是当时的训练器或优化器比做不到这样的工作，或者说无法在可行的时间内训练得到一个这样的恒等映射。
* **残差学习(residual_learning)、残差块:**
目标要训练得到的函数为H(x)，残差F(x) = H(x)-x。模型并不是直接去拟合H(x),而是去拟合F(x)，后面输入x通过一个shortcut连接与F(x)作相加操作，就得到我们预期想得到的函数H(x)。作者把这个残差学习设置为一个残差块，而最终模型由多个残差块组合而成。Res18、Res34、Res50、Res101、Res152中都有4个残差块，但是18、34和50、101、152的残差块有略微有点不同。

![Residual Learning残差块结构图](./model_image/residual%20learning.png)

* **瓶颈设计(bottleneck design):**
在18、34中用的残差块是由两个(3*3)卷积层中间有一个relu激活函数层构成，比较Res18和Res34的运算量可知，Res34的是比Res18的要多1倍的，在构造更深层网络时，运算量这样成倍增加是比较不利的，时间开销将会大大的增加。所以作者作了个bottleneck design调整了深层的残差块，Res50、Res101、Res152中的残差块依次由(1，1)卷积层、relu层、(3，3)卷积层、relu层、(1，1)卷积层构成。(1，1)卷积层在空间维度上不做任何操作，主要在通道维度上做调整。第一个(1，1)卷积层使得输入的通道数降下来，在输送给(3，3)卷积层提取特征，达到减少残差块里的运算的效果，后一个(1，1)卷积层(3，3)卷积层的输出映射到更高通道数的维度上，达到加深模型可以学到更多内容的效果(加深模型，模型学到的东西相对增加了，那么对应的通道数也可以设置得更大)。shortcut中的(1，1)卷积层主要是将上一个卷积块输出x的通道维度提升至与当前模块通道数相同(两个tensor维度相同才可以做加法运算)。

![bottleneck design结构图](./model_image/bottleneck%20design.png)

在后面作者在cifar10数据集上做实验，分别搭建了Res20、Res32、Res44、Res56、Res110、Res1202，前5个模型的效果都是在提升的，在Res1202中模型出现了过拟合现象，但过拟合现象并不明显，与Res110相比平均错误率增加了1.5%。整体的话，模型过拟合现象是比较轻的。

在文末，作者将ResNet应用到目标检测中，整体效果也是比较好的。（最新版论文中已将这部分删除。）

残差网络在训练时，训练效果比较快的一个理解：假设上一个块输出为g（x），那么通过本层后的输出为F(g(x))+g(x),再经过一个relu层，在求梯度时，F（x）函数是嵌套的，结果是一个累成的效果，这个值往往会比较小。函数相加求偏导后也是相加的，所以结果是F(x)的偏导加上g（x）的偏导，前面F(x)的结果较小没关系的，因为相对来说，后面g（x）部分的结果是较大的（相对F（x）的，因为多个块堆叠后还是嵌套的效果），这样整体就可以保持梯度是比较大的，模型调整比较快。

[ResNet论文链接**He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016.**](https://arxiv.org/abs/1512.03385)

In [None]:
# res18、res34 class
class Shallow_ResBlock(Model):
    def __init__(self, filters, strides=1, residual_dimension=False):
        super(Shallow_ResBlock, self).__init__()
        self.residual_dimension = residual_dimension

        self.conv1 = Conv2D(filters, (3, 3), strides=strides, padding='same')
        self.conv2 = Conv2D(filters, (3, 3), strides=1, padding='same')
        self.bn1 = BatchNormalization()
        self.bn2 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.relu2 = Activation('relu')

        if residual_dimension:
            self.residual_conv = Conv2D(filters, (1, 1), strides=strides)
            self.residual_bn = BatchNormalization()

    def call(self, inputs):
        residual_X = inputs

        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        Fx = self.bn2(x)

        if self.residual_dimension:
            residual_X = self.residual_conv(residual_X)
            residual_X = self.residual_bn(residual_X)
        y = self.relu2(Fx+residual_X)

        return y


class Shallow_Res(Model):
    def __init__(self, blocks_list):
        super(Shallow_Res, self).__init__()
        self.blocks_num = len(blocks_list)
        self.blocks_list = blocks_list

        self.conv = Conv2D(64, (7, 7), strides=2)
        self.bn = BatchNormalization()
        self.relu = Activation('relu')
        self.pool = MaxPool2D((3, 3), strides=2)

        self.blocks = Sequential()
        self.filters = 64
        for block_id in range(self.blocks_num):
            for layer_id in range(blocks_list[block_id]):
                if block_id != 0 and layer_id == 0:
                    block = Shallow_ResBlock(self.filters, strides=2, residual_dimension=True)
                else:
                    block = Shallow_ResBlock(self.filters)
                self.blocks.add(block)
            self.filters *= 2

        self.avg_pool = GlobalAveragePooling2D()
        self.f = Dense(class_number, activation='softmax')

    def call(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.blocks(x)
        x = self.avg_pool(x)
        y = self.f(x)

        return y


# res50、res101、res152 class
class Deep_ResBlock(Model):
    def __init__(self, filters, strides=1, residual_dimension=False):
        super(Deep_ResBlock, self).__init__()
        self.filters = filters
        self.residual_dimension = residual_dimension

        self.conv1 = Conv2D(filters, (1, 1), strides=strides)
        self.conv2 = Conv2D(filters, (3, 3), strides=1, padding='same')
        self.conv3 = Conv2D(4*filters, (1, 1), strides=1)
        self.bn1 = BatchNormalization()
        self.bn2 = BatchNormalization()
        self.bn3 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.relu2 = Activation('relu')
        self.relu3 = Activation('relu')

        '''
            每个残差块的第一块输入x通道数由64 -> 256  128 -> 512 256 -> 1024 512 -> 2048(4倍)
            可以理解为块中输入x的通道数与第三个卷积层输出通道数保持一致
            两个不同的残差块中，后一个的输出通道数为前一个的两倍，输出的宽高为前一个的一半。
        '''
        if residual_dimension:
            self.residual_conv = Conv2D(4*filters, (1, 1), strides=strides)
            self.residual_bn = BatchNormalization()

    def call(self, inputs):
        residual_x = inputs

        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.conv3(x)
        Fx = self.bn3(x)

        if self.residual_dimension:
            residual_x = self.residual_conv(residual_x)
            residual_x = self.residual_bn(residual_x)

        output = self.relu3(Fx+residual_x)
        return output


class Deep_Res(Model):
    def __init__(self, blocks_list):
        super(Deep_Res, self).__init__()
        self.blocks_num = len(blocks_list)
        self.blocks_list = blocks_list

        self.conv1 = Conv2D(64, (7, 7), strides=2)
        self.bn = BatchNormalization()
        self.relu = Activation('relu')
        self.max_pool = MaxPool2D((3, 3), strides=2)

        self.blocks = Sequential()
        self.filters = 64
        for block_id in range(self.blocks_num):  # 第几个块
            for layer_id in range(self.blocks_list[block_id]):  # 块里第几个
                if block_id == 0 and layer_id == 0:
                    block = Deep_ResBlock(self.filters, residual_dimension=True)
                elif block_id != 0 and layer_id == 0:
                    block = Deep_ResBlock(self.filters, strides=2, residual_dimension=True)
                else:
                    block = Deep_ResBlock(self.filters)
                self.blocks.add(block)
            self.filters *= 2

        self.avg_pool = GlobalAveragePooling2D()
        self.f = Dense(class_number, activation='softmax')

    def call(self, x):
        x = self.conv1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.max_pool(x)

        x = self.blocks(x)
        x = self.avg_pool(x)
        y = self.f(x)

        return y
        


## 加载数据

* 使用cifar10数据集训练LeNet-5
* 使用images_set训练AlexNet、VGG、ResNet

由于LeNet-5模型结构太简单，images_set数据集中每个类别最多只有1500张图片，所以使用images_set训练LeNet-5几乎不起作用，训练和测试acc都只有0.1(1000次迭代)。

In [None]:
'''
# for LeNet-5
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train/255.0, x_test/255.0
# 训练集乱序
np.random.seed(116)
np.random.shuffle(x_train)
np.random.seed(116)
np.random.shuffle(y_train)
tf.random.set_seed(116)
# 测试集乱序
np.random.seed(66)
np.random.shuffle(x_test)
np.random.seed(66)
np.random.shuffle(y_test)
'''


### 加载images_set
images_set为ImageNet2012数据集的一小部分，共有17类图片数据，每类图片有1000到1500张不等。对应为文件夹下每一个子文件夹为1类图片数据，子文件夹名字为类名，后面的数字为图片的数量。

* **推荐去[官网下载](https://image-net.org/request)，在提交下载请求时可能会遇到点小问题**

* **ImageNet数据获取可参看[Github地址](https://github.com/mf1024/ImageNet-Datasets-Downloader)**

* **ImageNet数据集[Kaggle链接](https://www.kaggle.com/datasets/lijiyu/imagenet)**

* **先加载images_set，可视化9张图片，显示数据集类别分布**

In [None]:
# 每个标签对应类名
features = []
'''
    读取数据
    参数classes：int, 类别数量，最多为17类
        train_size：int, 训练及图片数，当图片不足时，为读取本类别所有图片
        test_szie：int, 测试集样本数量
        优先将样本划分给测试集，当测试集够了后，再将样本添加到训练集。如有1500张图，先划分给测试集128张，剩余划分给训练集
        三个参数的默认值为17，1500，128。当所有都为默认值即读取完所有images_set的图片
    函数返回：x_train, y_train, x_test, y_test
            训练集、训练集标签、测试集、测试集标签（标签为数值，如0，1，2……）
'''
def load_data(classes=17, train_size=1500, test_size=128):
    x_train = []
    y_train = []
    x_test = []
    y_test = []

    img_label = 0
    for dir in os.listdir("../input/images/images_set/"):
        features.append(dir)
        num = 0
        for img_path in os.listdir("../input/images/images_set/"+str(dir)):
            img = cv2.imread("../input/images/images_set/"+str(dir)+'/'+str(img_path)) 
            '''
                转换为 224*224 (高，宽) 如(400, 500) 输出(500, 400)
                原算法用的是 224*224，内存足够的推荐使用224*224
                个人配置内存不足，所以转为更小的size  128*128
            '''
            img = cv2.resize(img, (112, 112), interpolation=cv2.INTER_AREA)
            # print(img.shape)
            if num < test_size:
                x_test.append(img)
                y_test.append(img_label)
            elif num < (train_size + test_size):
                x_train.append(img)
                y_train.append(img_label)
            else:
                break
            num += 1
        img_label += 1
        if img_label == classes:
            break

    x_train, y_train, x_test, y_test = np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)
    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)
    
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = load_data(classes=class_number)

# for images_set
# 查看前9张图片
fig = plt.figure(figsize=(25, 25)) 
for i in range(9):
    plt.subplot(3, 3, i+1)    
    plt.imshow(x_train[i])
    plt.title(features[int(y_train[i])])
    fig.tight_layout(pad=3.0)
plt.show()

# 检查数据集总体类分布
df = pd.DataFrame(data=np.hstack((y_train, y_test)))
#print(df)
counts = df.value_counts().sort_index()
print(counts)
def class_distribution(x, y, labels):
    fig, ax = plt.subplots()
    ax.bar(x, y)
    ax.set_xticklabels(labels, rotation=90)
    plt.show()
    
class_distribution(features, counts, features)

# 归一化处理
x_train, x_test = x_train/255.0, x_test/255.0
# 训练集乱序
np.random.seed(116)
np.random.shuffle(x_train)
np.random.seed(116)
np.random.shuffle(y_train)
tf.random.set_seed(116)
# 测试集乱序
np.random.seed(66)
np.random.shuffle(x_test)
np.random.seed(66)
np.random.shuffle(y_test)


### 定义运行函数

In [None]:
'''
    运行函数
    参数：model_name:字符串，模型算法名，可选Lenet5、Alexnet8、VGG16、VGG19、Res18、Res34、Res50、Res101、Res152
    train_data：元组或列表，（训练集，训练集标签）形式
    validation_data：元组或列表，（测试集，测试集标签）形式
    batch_size：int，批次大小，默认为128
    epochs：int，迭代次数，默认为100
    
    函数运行首先会检测模型是否已存在，存在则直接加载模型模型进行训练；不存在则创建模型。训练完成后会输出模型结构
    可视化训练和测试的acc和loss曲线，并保存最优模型和模型的权重参数。
'''
def run(model_name, train_data, validation_data, batch_size=128, epochs=100):
    models = {
        "Lenet5": LeNet5(),
        "Alexnet8": AlexNet8(),
        "GoogLeNet":GoogLeNet(),
        "VGG16": VGG16(),
        "VGG19": VGG19(),
        "Res18": Shallow_Res([2, 2, 2, 2]),
        "Res34": Shallow_Res([3, 4, 6, 3]),
        "Res50": Deep_Res([3, 4, 6, 3]),
        "Res101": Deep_Res([3, 4, 23, 3]),
        "Res152": Deep_Res([3, 8, 36, 3])
    }
    model = models[model_name]
    checkpoint_save_path = "./checkpoint/{}.ckpt".format(model_name)
    weights_save_path = './weights/{}-weights.txt'.format(model_name)
    model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=['sparse_categorical_accuracy'])
    if os.path.exists(checkpoint_save_path + '.index'):
        print('---'*20, ' load the model... ', '---'*20)
        model.load_weights(checkpoint_save_path)
    op_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True, save_best_only=True)
    history = model.fit(train_data[0], train_data[1], batch_size=batch_size, epochs=epochs,
                        validation_data=validation_data, validation_freq=1, callbacks=[op_callback])
    model.summary()

    # 可视化 loss 和 acc
    acc = history.history['sparse_categorical_accuracy']
    val_acc = history.history['val_sparse_categorical_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.title("Training and Valiidation Loss")
    plt.legend()
    plt.show()
    
    # 模型参数写入文件 ./weights.txt
    with open(weights_save_path, 'w') as file:
        for v in model.trainable_variables:
            file.write(str(v.name) + '\n')
            file.write(str(v.shape) + '\n')
            file.write(str(v.numpy()) + '\n')

### **运行模型**
* **建议在运行前现在文件夹中创建checkpoint文件夹和weights文件夹**
* **Lenet5模型加载cifar10数据集**
* **AlexNet、GoogLeNEt、VGG、ResNet模型加载images_set数据集**

In [None]:
# run('Lenet5', (x_train, y_train), (x_test, y_test), batch_size=1024, epochs=500)
#run('Alexnet8', (x_train, y_train), (x_test, y_test), batch_size=256, epochs=300)
run('GoogLeNet', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=200)
#run('VGG16', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=200)
#run('VGG19', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=100)
#run('Res18', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=300)
#run('Res34', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=200)
#run('Res50', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=200)
#run('Res101', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=100)
#run('Res152', (x_train, y_train), (x_test, y_test), batch_size=128, epochs=100)


## **以上程序均是在Kaggle平台上运行的，由于Kaggle平台在下载notebook时，不会把运行过程中的结果保留下来（也可能是我个人操作有问题），所以我将所有结果保留在同级目录文件夹results_output (All_run_in_Kaggle)中，供查看**