In [None]:
import numpy as np  # Import NumPy library
import pandas as pd  # Import Pandas library
# Solution 1
# from keras.datasets import mnist  # Import MNIST dataset from Keras

# # Load the training and testing datasets
# (X_train_image, y_train_label), (X_test_image, y_test_label) = mnist.load_data()

# print ("数据集张量形状：", X_train_image.shape) #用shape方法显示张量的形状
# print ("第一个数据样本：\n", X_train_image[0]) #注意Python的索引是从0开始的
# print ("第一个数据样本的标签：", y_train_label[0])

# Solution 2, Load local dataset
import os
os.listdir('../../../../dirRawDataSet')

with np.load('../../../../dirRawDataSet/mnist.npz') as data:
    X_train_image = data['x_train']
    y_train_label = data['y_train']
    X_test_image = data['x_test']
    y_test_label = data['y_test']

# Check the shapes of the loaded arrays
print("X_train_image shape:", X_train_image.shape)
print("y_train_label shape:", y_train_label.shape)
print("X_test_image shape:", X_test_image.shape)
print("y_test_label shape:", y_test_label.shape)

In [None]:
# Solution 1
# from keras.utils import to_categorical # 导入keras.utils工具库的类别转换工具
# X_train = X_train_image.reshape(60000, 28, 28, 1) # 给标签增加一个维度
# X_test = X_test_image.reshape(10000, 28, 28, 1) # 给标签增加一个维度
# y_train = to_categorical(y_train_label, 10) # 特征转换为one-hot编码
# y_test = to_categorical(y_test_label, 10) # 特征转换为one-hot编码
# print ("训练集张量形状：", X_train.shape) # 训练集张量的形状
# print ("第一个数据标签：", y_train[0]) # 显示标签集的第一个数据

# Solution 2
from keras.utils import to_categorical  # Import the utility for one-hot encoding labels

# Reshape images to include the channel dimension for compatibility with Conv2D layers
X_train = X_train_image.reshape(-1, 28, 28, 1)  # Automatically determines the first dimension
X_test = X_test_image.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding for categorical classification
y_train = to_categorical(y_train_label, num_classes=10)
y_test = to_categorical(y_test_label, num_classes=10)

# Display the shapes for confirmation
print("Training set shape:", X_train.shape)
print("First training label (one-hot):", y_train[0])


In [None]:
# Solution 1
# from keras import models # 导入Keras模型, 以及各种神经网络的层
# from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
# model = models.Sequential() # 用序贯方式建立模型
# model.add(Conv2D(32, (3, 3), activation='relu', # 添加Conv2D层
#                 input_shape=(28, 28, 1)))  # 指定输入数据样本张量的类型
# model.add(MaxPooling2D(pool_size=(2, 2))) # 添加MaxPooling2D层
# model.add(Conv2D(64, (3, 3), activation='relu')) # 添加Conv2D层
# model.add(MaxPooling2D(pool_size=(2, 2))) # 添加MaxPooling2D层
# model.add(Dropout(0.25)) # 添加Dropout层
# model.add(Flatten()) # 展平
# model.add(Dense(128, activation='relu')) # 添加全连接层
# model.add(Dropout(0.5)) # 添加Dropout层
# model.add(Dense(10, activation='softmax')) # Softmax分类激活, 输出10维分类码
# # 编译模型
# model.compile(optimizer='rmsprop', # 指定优化器
# loss='categorical_crossentropy', # 指定损失函数
# metrics=['accuracy']) # 指定验证过程中的评估指标

# Solution 2
from keras import models  # Import Keras models
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input

model = models.Sequential()  # Create a Sequential model
model.add(Input(shape=(28, 28, 1)))  # Define the input shape explicitly
model.add(Conv2D(32, (3, 3), activation='relu'))  # Add Conv2D layer
model.add(MaxPooling2D(pool_size=(2, 2)))  # Add MaxPooling2D layer
model.add(Conv2D(64, (3, 3), activation='relu'))  # Add Conv2D layer
model.add(MaxPooling2D(pool_size=(2, 2)))  # Add MaxPooling2D layer
model.add(Dropout(0.25))  # Add Dropout layer
model.add(Flatten())  # Flatten the input
model.add(Dense(128, activation='relu'))  # Add fully connected layer
model.add(Dropout(0.5))  # Add Dropout layer
model.add(Dense(10, activation='softmax'))  # Output layer with softmax for 10 classes

# Compile the model
model.compile(
    optimizer='rmsprop',  # Specify the optimizer
    loss='categorical_crossentropy',  # Specify the loss function
    metrics=['accuracy']  # Specify metrics for evaluation
)

This code builds a Convolutional Neural Network (CNN) using Keras, iteratively creating multiple models and configuring their layers. Let's break it down:
General Structure:

    The code defines a list model that will contain multiple CNN models (15 models in total, based on the nets = 15 line).
    For each of the 15 models, the code builds a CNN architecture with multiple convolutional layers, batch normalization, dropout layers, and fully connected (dense) layers.
    It also compiles the model using the Adam optimizer and categorical cross-entropy loss function, which is typically used for multi-class classification tasks.

Code Explanation:

    Model Initialization:

nets = 15
model = [0] * nets

    The nets = 15 specifies the number of CNN models to create.
    The list model is initialized with 15 elements, each set to 0. These will hold the individual models.

Iterative Model Construction:

for j in range(nets):
    model[j] = Sequential()

    A loop is used to create each model (Sequential()).
    Each model is initialized as a Keras Sequential model, which means the layers will be stacked in order.

Convolutional Layers with Batch Normalization:

model[j].add(Conv2D(32, kernel_size = 3, activation='relu', input_shape = (28, 28, 1)))
model[j].add(BatchNormalization())
model[j].add(Conv2D(32, kernel_size = 3, activation='relu'))
model[j].add(BatchNormalization())
model[j].add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
model[j].add(BatchNormalization())

    Conv2D(32, kernel_size=3, activation='relu'): Adds a convolutional layer with 32 filters (kernels), a kernel size of 3x3, and ReLU activation.
    BatchNormalization(): Normalizes the output of the previous layer to speed up training and reduce overfitting.
    Another Conv2D layer with a kernel size of 5x5 and strides=2 (which reduces the spatial dimensions of the input).
    padding='same' ensures that the output size matches the input size by padding the input.
    These layers extract features from the image data (28x28x1, grayscale images).

Dropout Layer:

model[j].add(Dropout(0.4))

    This layer randomly sets 40% of the inputs to zero during training to prevent overfitting.

Additional Convolutional and Pooling Layers:

model[j].add(Conv2D(64, kernel_size = 3, activation='relu'))
model[j].add(BatchNormalization())
model[j].add(Conv2D(64, kernel_size = 3, activation='relu'))
model[j].add(BatchNormalization())
model[j].add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
model[j].add(BatchNormalization())
model[j].add(Dropout(0.4))

    After the first set of convolutional layers, the code adds more Conv2D layers with 64 filters.
    Again, each convolutional layer is followed by batch normalization and dropout layers.
    These layers are used to learn more complex features and further reduce overfitting.

Final Convolutional Layer and Flatten:

model[j].add(Conv2D(128, kernel_size = 4, activation='relu'))
model[j].add(BatchNormalization())
model[j].add(Flatten())
model[j].add(Dropout(0.4))

    The final Conv2D layer has 128 filters and uses a kernel size of 4x4.
    The output is flattened to a 1D array, as a fully connected (dense) layer will be applied next.
    Another dropout layer is added.

Fully Connected Output Layer:

model[j].add(Dense(10, activation='softmax'))

    The Dense layer with 10 neurons is the output layer, which corresponds to the number of classes (10 classes for MNIST digits).
    The Softmax activation function is used, which will output a probability distribution across the 10 classes.

Model Compilation:

    model[j].compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

        The model is compiled using the Adam optimizer, which adapts the learning rate based on training performance.
        The categorical cross-entropy loss function is used because this is a multi-class classification task (MNIST).
        The model will be evaluated using accuracy as the metric.

Summary:

This code builds 15 different CNN models with the following structure:

    Multiple convolutional layers with Batch Normalization and Dropout for regularization.
    Pooling layers (via strides) to reduce the spatial dimensions.
    A final Dense layer with 10 units for classification into 10 categories (digits from 0 to 9).
    The models are compiled using the Adam optimizer and categorical cross-entropy loss function.

The code is designed to create and compile 15 distinct models, which could then be trained on the MNIST dataset or another similar dataset. The idea behind having multiple models may be for ensemble learning, where each model's predictions are combined to improve performance.

In [None]:
# Solution 1
# model.fit(X_train, y_train, # 指定训练特征集和训练标签集
#             validation_split = 0.3, # 部分训练集数据拆分成验证集
#             epochs=5, # 训练轮次为5轮
#             batch_size=128) # 以128为批量进行训练

# Solution 2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau  # Import callbacks

# Set up callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),  # Stop early if no improvement in 3 epochs
    ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True),  # Save the best model
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-5)  # Reduce learning rate on plateau
]

# Train the model with the optimized settings
history = model.fit(
    X_train, y_train,                  # Specify training features and labels
    validation_split=0.3,              # Split 30% of training data for validation
    epochs=5,                          # Set higher max epochs, but with early stopping
    batch_size=128,                    # Use a batch size of 128
    callbacks=callbacks,               # Use callbacks for early stopping, model checkpoint, and learning rate adjustment
    verbose=1                          # Print training progress
)

In [None]:
# Solution 1
# score = model.evaluate(X_test, y_test) # 在验证集上进行模型评估
# print('测试集预测准确率:', score[1]) # 输出测试集上的

# Solution 2
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)  # Evaluate with minimal output
print(f"测试集预测准确率: {test_accuracy:.4f}")  # Format accuracy to 4 decimal places


In [None]:
# Solution 1
# pred = model.predict(X_test[0].reshape(1, 28, 28, 1)) # 预测测试集第一个数据
# print(pred[0], "转换一下格式得到：", pred.argmax()) # 把one-hot编码转换为数字
# import matplotlib.pyplot as plt # 导入绘图工具包
# plt.imshow(X_test[0].reshape(28, 28), cmap='Greys') # 输出这个图片

# Solution 2
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

# Set the font to a CJK-compatible font
# plt.rcParams['font.family'] = 'Noto Sans CJK JP'
plt.rcParams['font.family'] = 'DejaVu Sans'

# Your code to make predictions and plot
first_prediction = model.predict(X_test[0].reshape(1, 28, 28, 1))
predicted_label = first_prediction.argmax()                           

# Display prediction result and image
print(f"预测值 (one-hot 编码): {first_prediction[0]}, 转换为数字格式: {predicted_label}")
plt.imshow(X_test[0], cmap='Greys')
plt.title(f"Model prediction: {predicted_label}")
plt.axis('off')
plt.show()
plt.rcdefaults()
