### MLP多层感知机

In [None]:
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix
# 导入必要的库
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

# 加载鸢尾花数据集
# data = load_iris()
mat_data = sio.loadmat('../data/dataset/Satellite.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签

# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)
classes, counts = get_classes_indexes_counts(y_test)
print(counts)
# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(10, 20), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
index_pred_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)

# 输出结果
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:\n", classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(cm.sum(axis=1))
print(cm.diagonal())
print(cm.diagonal().sum())
print(index_pred_proba.shape)

index_pred_proba

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

# 加载鸢尾花数据集
iris = load_iris()
X, y = iris.data, iris.target

# 数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print(X_test.shape)
print(y_test.shape)
# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 创建和训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
mlp.fit(X_train, y_train)

# 预测
y_pred = mlp.predict(X_test)

# 输出结果
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=iris.target_names)

### k-folds交叉验证

In [None]:
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_predict, StratifiedKFold
from sklearn.metrics import classification_report

# 1. 加载鸢尾花数据集
data = load_iris()
X, y = data.data, data.target

# 2. 初始化 MLP 模型
mlp_1 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=200, random_state=42)

# 3. 配置五折交叉验证
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 4. 使用 cross_val_predict 进行交叉验证并获取预测
y_pred = cross_val_predict(mlp_1, X, y, cv=cv)

# 5. 输出分类报告
print("Classification Report:\n")
print(classification_report(y, y_pred, target_names=data.target_names))

mlp_2 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=200, random_state=42)
# 数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
mlp_2.fit(X_train, y_train)

# 预测
y_pred = mlp_2.predict(X_test)

# 输出分类报告
print(classification_report(y_test, y_pred, target_names=data.target_names))

In [2]:
from scipy.io import savemat
from scipy.stats import gmean
import numpy as np
from utils.dataset_utils import get_classes_indexes_counts
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import scipy.io as sio  # 从.mat文件中读取数据集

mat_data = sio.loadmat('../data/dataset/Chess4.mat')
X = mat_data['X']  # 特征
y = mat_data['Y'][:, 0]  # 标签
classes, counts = get_classes_indexes_counts(y)
# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

# X_save=X[np.array(classes[0]),:]
# Y_save=y[np.array(classes[0]),:]

# index = [1, 2, 3, 6, 7, 12, 13, 15]
# for i in index:
#     X_save = np.vstack((X_save,X[np.array(classes[i]),:]))
#     Y_save = np.vstack((Y_save,y[np.array(classes[i]),:]))
# # 保存为 .mat 文件
# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(Y_save)
# y_encoded = y_encoded.astype(int)
# data_dict = {'X': X_save, 'Y': y_encoded.reshape(-1, 1)}  # 以字典形式存储
# savemat('Chess4.mat', data_dict)
print(counts)
classccc = []
for i in range(len(classes)):
    classccc.append(len(classes[i]))
print(classccc)
# 构建并训练MLP模型
mlp = MLPClassifier(hidden_layer_sizes=(100), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
y_proba = mlp.predict_proba(X_test)
# 预测和评估模型
y_pred = mlp.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
# 计算 ROC AUC（ovo+macro）
auc_ovo_macro = roc_auc_score(y_test, y_proba, multi_class="ovo", average="macro")
# 计算每类召回率（每类正确预测个数 / 该类总数）
recall_per_class = cm.diagonal() / cm.sum(axis=1)
# 计算G-Mean
geometric_mean = gmean(recall_per_class)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
# 打印分类报告
print("Classification Report:")
print(classification_report(y_test, y_pred))
# 打印混淆矩阵
print("Confusion Matrix:")
print(cm)
print(f"最终的集成分类结果：Recall_Per_Class{recall_per_class}，Gmean：{geometric_mean}，mAUC：{auc_ovo_macro}")

[2796 1433 2854 2166 4553 1712 1985 4194 3597]
[2796, 1433, 2854, 2166, 4553, 1712, 1985, 4194, 3597]
Accuracy: 0.67
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       843
           1       0.76      0.67      0.71       450
           2       0.58      0.47      0.52       849
           3       0.78      0.69      0.73       678
           4       0.67      0.80      0.73      1348
           5       0.59      0.60      0.59       514
           6       0.55      0.54      0.54       571
           7       0.62      0.60      0.61      1195
           8       0.58      0.59      0.59      1139

    accuracy                           0.67      7587
   macro avg       0.68      0.66      0.67      7587
weighted avg       0.67      0.67      0.67      7587

Confusion Matrix:
[[ 835    4    0    3    1    0    0    0    0]
 [   2  301   14    0    0  107   24    1    1]
 [   5    6  400    1    4   28  109   44