# 使用卷积神经网络进行cifar10数据集的分类任务

搭建带有残差网络的CNN训练模型，之后再进行迁移学习

## 一、加载数据集

In [1]:
import numpy as np
# 加载数据的代码
import tensorflow as tf
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

# 查看数据集形状
print("训练集图片形状:", X_train_full.shape) # 应该是 (50000, 32, 32, 3)
print("训练集标签形状:", y_train_full.shape) # 应该是 (50000, 1)

X_train_full, X_test = X_train_full / 255., X_test / 255.

训练集图片形状: (50000, 32, 32, 3)
训练集标签形状: (50000, 1)


In [2]:
from sklearn.preprocessing import LabelEncoder


# 按0-9顺序定义类别：飞机、汽车、鸟类、猫、鹿、狗、青蛙、马、船和卡车
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']


# 分为二分类和八分类问题
classes = ['horse', 'ship']     # 分类7对应 horse，分类8对应 ship

# 训练集拆分
mask_arr = (y_train_full == 7) | (y_train_full == 8)

mask_arr = mask_arr.ravel()
X_train_binary_classes = X_train_full[mask_arr]
y_train_binary_classes = y_train_full[mask_arr]

X_train = X_train_full[~mask_arr]
y_train = y_train_full[~mask_arr]


# 测试集拆分
mask_arr1 = (y_test == 7) | (y_test == 8)
mask_arr1 = mask_arr1.ravel()
X_test_binary_classes = X_test[mask_arr1]
y_test_binary_classes = y_test[mask_arr1]

X_test = X_test[~mask_arr1]
y_test = y_test[~mask_arr1]

# 分类重新编码
# 二分类
y_train_binary_classes = (y_train_binary_classes == 8).astype(np.int8)
y_test_binary_classes = (y_test_binary_classes == 8).astype(np.int8)

# 八分类
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train.ravel())
y_test = encoder.transform(y_test.ravel())


## 二、构造卷积神经网络

In [3]:
from functools import partial

DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, strides=1, padding="same", kernel_initializer="he_normal", use_bias=False)


# 定义残差块
@tf.keras.utils.register_keras_serializable(name="ResidualUnit")
class ResidualUnit(tf.keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.strides = strides
        self.activation = activation
        self.activation = tf.keras.activations.get(activation)

        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            tf.keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            tf.keras.layers.BatchNormalization()
        ]

        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                tf.keras.layers.BatchNormalization()
            ]


    def call(self, inputs, *args, **kwargs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)

        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)

        return self.activation(Z + skip_Z)

    def get_config(self):
        # 把自定义参数加入配置
        config = super().get_config()
        config.update({
            "filters": self.filters,
            "strides": self.strides,
            "activation": self.activation,
        })
        return config

In [4]:
model = tf.keras.Sequential([
    DefaultConv2D(64, kernel_size=7, strides=2, input_shape=[32, 32, 3]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same")
])

prev_filters = 64
for filter in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filter == prev_filters else 2
    model.add(ResidualUnit(filter, strides=strides))
    prev_filters = filter

model.add(tf.keras.layers.GlobalAvgPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(8, activation="softmax"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 25ms/step - accuracy: 0.4028 - loss: 1.7362 - val_accuracy: 0.3801 - val_loss: 1.7136
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 22ms/step - accuracy: 0.5827 - loss: 1.1735 - val_accuracy: 0.4363 - val_loss: 1.8964
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 22ms/step - accuracy: 0.6218 - loss: 1.0680 - val_accuracy: 0.5727 - val_loss: 1.3853
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 22ms/step - accuracy: 0.6911 - loss: 0.8592 - val_accuracy: 0.4593 - val_loss: 5.5761
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 22ms/step - accuracy: 0.7046 - loss: 0.8323 - val_accuracy: 0.5821 - val_loss: 1.2426
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 22ms/step - accuracy: 0.7304 - loss: 0.7565 - val_accuracy: 0.6722 - val_loss: 0.9339
Epoc

<keras.src.callbacks.history.History at 0x7d77de259640>

In [6]:
model.save("./tmps/pretrain_model.keras")

## 三、迁移学习（重用预训练层）

In [7]:
new_model = tf.keras.models.load_model("./tmps/pretrain_model.keras", custom_objects={'ResidualUnit': ResidualUnit})




In [8]:
model2 = tf.keras.Sequential(new_model.layers[:-1])
model2.add(tf.keras.layers.Dense(1, activation="sigmoid"))

# 冻结预训练层
for layer in model2.layers[:-1]:
  layer.trainable = False

model2.compile(
    loss="binary_crossentropy",
    optimizer="sgd",
    metrics=["accuracy"]
)

model2.fit(X_train_binary_classes, y_train_binary_classes, epochs=5)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.7556 - loss: 0.6687
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8370 - loss: 0.4915
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8375 - loss: 0.4955
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8398 - loss: 0.5195
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8391 - loss: 0.5581


<keras.src.callbacks.history.History at 0x7d77262edd60>

In [9]:
# 解冻预训练层
for layer in model2.layers[:-1]:
  layer.trainable = True

model2.compile(
    loss="binary_crossentropy",
    optimizer="sgd",
    metrics=["accuracy"]
)

model2.fit(X_train_binary_classes, y_train_binary_classes, epochs=10, validation_data=(X_test_binary_classes, y_test_binary_classes))

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 53ms/step - accuracy: 0.8645 - loss: 0.3491 - val_accuracy: 0.9180 - val_loss: 0.2461
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 16ms/step - accuracy: 0.9320 - loss: 0.1797 - val_accuracy: 0.9475 - val_loss: 0.1463
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.9412 - loss: 0.1546 - val_accuracy: 0.9505 - val_loss: 0.1337
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.9498 - loss: 0.1313 - val_accuracy: 0.9580 - val_loss: 0.1109
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.9552 - loss: 0.1163 - val_accuracy: 0.9625 - val_loss: 0.1040
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.9581 - loss: 0.1064 - val_accuracy: 0.9610 - val_loss: 0.1003
Epoch 7/10
[1m313/3

<keras.src.callbacks.history.History at 0x7d772627aba0>

可以看到，模型表现很不错，训练集和验证集均达到了97%的准确率