### MLP多层感知机

In [4]:
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix
# 导入必要的库
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

# 加载鸢尾花数据集
# data = load_iris()
mat_data = sio.loadmat('../data/dataset/Satellite.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签

# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)
classes, counts = get_classes_indexes_counts(y_test)
print(counts)
# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(10, 20), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)

# 输出结果
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:\n", classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(cm.sum(axis=1))
print(cm.diagonal())
print(cm.diagonal().sum())
print(index_pred_proba.shape)

index_pred_proba

[423 227 423 187 228 443]
准确率: 0.8772656654583117

分类报告:
               precision    recall  f1-score   support

           0       0.96      0.98      0.97       423
           1       0.94      0.99      0.97       227
           2       0.90      0.89      0.89       423
           3       0.60      0.63      0.61       187
           4       0.88      0.89      0.88       228
           5       0.86      0.81      0.83       443

    accuracy                           0.88      1931
   macro avg       0.86      0.86      0.86      1931
weighted avg       0.88      0.88      0.88      1931

Confusion Matrix:
[[415   1   5   0   2   0]
 [  0 225   0   0   2   0]
 [  6   1 377  28   1  10]
 [  3   5  24 118   4  33]
 [  9   4   0   0 202  13]
 [  0   3  14  51  18 357]]
[423 227 423 187 228 443]
[415 225 377 118 202 357]
1694
(1931, 6)


array([[1.26358870e-03, 2.95327621e-03, 2.75867514e-04, 1.22432888e-02,
        9.49464611e-01, 3.37993681e-02],
       [4.48003909e-06, 2.25903496e-05, 1.30420419e-01, 6.49700543e-01,
        7.20267032e-04, 2.19131700e-01],
       [6.12439502e-04, 1.23576047e-02, 2.86677640e-02, 4.38111663e-01,
        9.08693774e-02, 4.29381152e-01],
       ...,
       [9.96151899e-01, 3.09718251e-08, 5.82946607e-09, 1.47218277e-09,
        3.84806145e-03, 1.19381333e-09],
       [3.58876803e-07, 4.38909516e-04, 2.38953825e-01, 3.67178563e-01,
        1.67255092e-03, 3.91755793e-01],
       [8.64519795e-07, 7.47939578e-04, 2.71858011e-01, 5.59632953e-01,
        5.32107733e-04, 1.67228125e-01]])

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
import numpy as np

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

# 数据拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 定义并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)

# 测试集预测
y_pred = mlp.predict(X_test)

# 混淆矩阵
cm = confusion_matrix(y_test, y_pred)

# 输出结果
print("类别信息:")
for i, class_name in enumerate(class_names):
    total_count = np.sum(y_test == i)
    correct_count = cm[i, i]
    incorrect_count = total_count - correct_count
    print(f"类别 {class_name}:")
    print(f" - 测试集数量: {total_count}")
    print(f" - 预测正确数量: {correct_count}")
    print(f" - 预测错误数量: {incorrect_count}")

### k-folds交叉验证

In [None]:
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_predict, StratifiedKFold
from sklearn.metrics import classification_report

# 1. 加载鸢尾花数据集
data = load_iris()
X, y = data.data, data.target

# 2. 初始化 MLP 模型
mlp_1 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=200, random_state=42)

# 3. 配置五折交叉验证
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 4. 使用 cross_val_predict 进行交叉验证并获取预测
y_pred = cross_val_predict(mlp_1, X, y, cv=cv)

# 5. 输出分类报告
print("Classification Report:\n")
print(classification_report(y, y_pred, target_names=data.target_names))

mlp_2 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=200, random_state=42)
# 数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
mlp_2.fit(X_train, y_train)

# 预测
y_pred = mlp_2.predict(X_test)

# 输出分类报告
print(classification_report(y_test, y_pred, target_names=data.target_names))

### Nos下训练MLP

In [1]:
from scipy.stats import gmean
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

mat_data = sio.loadmat('../data/dataset/Chess4.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签
classes, counts = get_classes_indexes_counts(y)
# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

print(counts)
# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(15), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
y_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
# 计算 ROC AUC（ovo+macro）
auc_ovo_macro = roc_auc_score(y_test, y_proba, multi_class="ovo", average="macro")
# 计算每类召回率（每类正确预测个数 / 该类总数）
recall_per_class = cm.diagonal() / cm.sum(axis=1)
# 计算G-Mean
geometric_mean = gmean(recall_per_class)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
# 打印分类报告
print("Classification Report:")
print(classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
print(cm)
print(f"最终的集成分类结果：Recall_Per_Class{recall_per_class}，Gmean：{geometric_mean}，mAUC：{auc_ovo_macro}")

[2796 1433 2854 2166 4553 1712 1985 4194 3597]
Accuracy: 0.55
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.96      0.95       843
           1       0.61      0.57      0.59       450
           2       0.39      0.37      0.38       849
           3       0.67      0.60      0.63       678
           4       0.57      0.70      0.63      1348
           5       0.46      0.40      0.43       514
           6       0.34      0.26      0.29       571
           7       0.46      0.48      0.47      1195
           8       0.45      0.44      0.44      1139

    accuracy                           0.55      7587
   macro avg       0.54      0.53      0.53      7587
weighted avg       0.54      0.55      0.54      7587

Confusion Matrix:
[[812  14   0  16   0   1   0   0   0]
 [  4 256  31   0   0  93  60   2   4]
 [  9  13 313   1  26  43  79  80 285]
 [  5   0   0 409 251   0   0  11   2]
 [  5   0   6 157 948   0   0 204  2