### MLP多层感知机

In [1]:
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix
# 导入必要的库
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

# 加载鸢尾花数据集
# data = load_iris()
mat_data = sio.loadmat('../data/dataset/Satellite.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签

# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)
classes, counts = get_classes_indexes_counts(y_test)
print(counts)
# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(10, 20), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)

# 输出结果
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:\n", classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(cm.sum(axis=1))
print(cm.diagonal())
print(cm.diagonal().sum())
print(index_pred_proba.shape)

[423 227 423 187 228 443]
准确率: 0.8990160538581046

分类报告:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97       423
           1       0.96      0.98      0.97       227
           2       0.89      0.92      0.90       423
           3       0.71      0.64      0.68       187
           4       0.88      0.94      0.91       228
           5       0.89      0.86      0.87       443

    accuracy                           0.90      1931
   macro avg       0.88      0.88      0.88      1931
weighted avg       0.90      0.90      0.90      1931

Confusion Matrix:
[[411   1  10   0   1   0]
 [  0 222   0   0   5   0]
 [  5   1 390  15   0  12]
 [  3   3  31 120   2  28]
 [  4   2   0   2 214   6]
 [  1   3   9  31  20 379]]
[423 227 423 187 228 443]
[411 222 390 120 214 379]
1736
(1931, 6)


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
import numpy as np

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

# 数据拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 定义并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)

# 测试集预测
y_pred = mlp.predict(X_test)

# 混淆矩阵
cm = confusion_matrix(y_test, y_pred)

# 输出结果
print("类别信息:")
for i, class_name in enumerate(class_names):
    total_count = np.sum(y_test == i)
    correct_count = cm[i, i]
    incorrect_count = total_count - correct_count
    print(f"类别 {class_name}:")
    print(f" - 测试集数量: {total_count}")
    print(f" - 预测正确数量: {correct_count}")
    print(f" - 预测错误数量: {incorrect_count}")

### k-folds交叉验证

In [None]:
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_predict, StratifiedKFold
from sklearn.metrics import classification_report

# 1. 加载鸢尾花数据集
data = load_iris()
X, y = data.data, data.target

# 2. 初始化 MLP 模型
mlp_1 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=200, random_state=42)

# 3. 配置五折交叉验证
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 4. 使用 cross_val_predict 进行交叉验证并获取预测
y_pred = cross_val_predict(mlp_1, X, y, cv=cv)

# 5. 输出分类报告
print("Classification Report:\n")
print(classification_report(y, y_pred, target_names=data.target_names))

mlp_2 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=200, random_state=42)
# 数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
mlp_2.fit(X_train, y_train)

# 预测
y_pred = mlp_2.predict(X_test)

# 输出分类报告
print(classification_report(y_test, y_pred, target_names=data.target_names))

### Nos下训练MLP

In [3]:
from scipy.stats import gmean
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

mat_data = sio.loadmat('../data/dataset/Satellite.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签
classes, counts = get_classes_indexes_counts(y)
# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

print(counts)
# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(20), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
y_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
# 计算 ROC AUC（ovo+macro）
auc_ovo_macro = roc_auc_score(y_test, y_proba, multi_class="ovo", average="macro")
# 计算每类召回率（每类正确预测个数 / 该类总数）
recall_per_class = cm.diagonal() / cm.sum(axis=1)
# 计算G-Mean
geometric_mean = gmean(recall_per_class)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
# 打印分类报告
print("Classification Report:")
print(classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
print(cm)
print(f"最终的集成分类结果：Recall_Per_Class{recall_per_class}，Gmean：{geometric_mean}，mAUC：{auc_ovo_macro}")

[1533  703 1358  626  707 1508]
Accuracy: 0.86
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       423
           1       0.94      0.98      0.96       227
           2       0.79      0.92      0.85       423
           3       0.70      0.37      0.48       187
           4       0.85      0.84      0.85       228
           5       0.84      0.84      0.84       443

    accuracy                           0.86      1931
   macro avg       0.85      0.82      0.82      1931
weighted avg       0.85      0.86      0.85      1931

Confusion Matrix:
[[412   1   7   0   3   0]
 [  0 222   0   0   5   0]
 [  3   0 391  17   3   9]
 [  3   2  63  69   3  47]
 [ 14   5   0   2 192  15]
 [  0   5  37  11  19 371]]
最终的集成分类结果：Recall_Per_Class[0.97399527 0.97797357 0.92434988 0.36898396 0.84210526 0.83747178]，Gmean：0.7822472905006482，mAUC：0.9755285901893908


### 集成分类器

In [ ]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 加载鸢尾花数据集
data = load_iris()
X = data.data
y = data.target

# 数据集划分：训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 定义多个 MLP 基分类器，设置不同的参数
mlp1 = MLPClassifier(hidden_layer_sizes=(10,), max_iter=500, random_state=42)  # 单隐层
mlp2 = MLPClassifier(hidden_layer_sizes=(20, 10), max_iter=500, random_state=43)  # 双隐层
mlp3 = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=500, random_state=44)  # 三隐层

# 构建投票集成分类器
voting_clf = VotingClassifier(
    estimators=[('mlp1', mlp1), ('mlp2', mlp2), ('mlp3', mlp3)],
    voting='soft'  # 使用软投票
)

# 训练集成分类器
voting_clf.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = voting_clf.predict(X_test)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"集成多个 MLP 分类器后的准确率: {accuracy:.2f}")
