# SelfPacedEnsembleClassifier

In [1]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from metrics.metrics import calculate_gmean_mauc
from sklearn.model_selection import train_test_split
from utils.dataset_utils import get_distribution
from imbens.ensemble import SelfPacedEnsembleClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from instance_selection.parameter.parameter import *  # 导入参数的设定
import scipy.io as sio  # 从.mat文件中读取数据集
from sklearn.base import clone
import warnings

warnings.filterwarnings("ignore")  # 忽略警告

DATASET = Splice  # 数据集名称（包含对应的参数配置）
datasetname = DATASET.DATASETNAME.split('.')[0]
mat_data = sio.loadmat(IMBALANCED_DATASET_PATH + DATASET.DATASETNAME)  # 加载、划分数据集
x = mat_data['X']
y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]
model = MLPClassifier(hidden_layer_sizes=(DATASET.HIDDEN_SIZE,), max_iter=DATASET.MAX_ITER,
                      random_state=RANDOM_SEED+1, learning_rate_init=DATASET.LEARNING_RATE)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, stratify=y, random_state=42+1)  # 划分数据集

# 输出数据集分布
unique_elements_all, classes_all, counts_all = get_distribution(y)  # 获取原始数据集分布
unique_elements_train, classes_train, counts_train = get_distribution(y_train)  # 获取训练集分布
unique_elements_test, classes_test, counts_test = get_distribution(y_test)  # 获取测试集分布
print(datasetname + f' distribution: {counts_all}')
print(f'trainset distribution: {counts_train}')
print(f'testset distribution: {counts_test}')
num_run = 50
gmean_results = []
mauc_results = []
results = []
for i in range(0, num_run):
    clf = SelfPacedEnsembleClassifier(estimator=clone(model), random_state=np.random.randint(1, 10000), n_estimators=30)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    y_pred_proba = clf.predict_proba(x_test)
    gmean, mauc, recall_per_class = calculate_gmean_mauc(y_pred_proba, y_test)
    # 输出gmean, mauc, recall_per_class
    results.append([gmean, mauc])
    gmean_results.append([gmean])
    mauc_results.append([mauc])
    print(f'第{i + 1}次运行:gmean: {gmean:.4f}, mauc: {mauc:.4f}, recall_per_class: {recall_per_class}')
# 输出results平均值
print(f'平均值{np.mean(results, axis=0)}')
# 输出准确率
# accuracy = accuracy_score(y_test, y_pred)
# print(f'模型准确率: {accuracy:.4f}')

# 输出混淆矩阵
# cm = confusion_matrix(y_test, y_pred)
# print("\n混淆矩阵：\n", cm)

# 输出分类报告
# report = classification_report(y_test, y_pred)
# print("\n分类报告：\n", report)



Splice distribution: [ 767  768 1655]
trainset distribution: [ 537  538 1158]
testset distribution: [230 230 497]
第1次运行:gmean: 0.5334, mauc: 0.7643, recall_per_class: [0.75217391 0.82173913 0.24547284]
第2次运行:gmean: 0.4649, mauc: 0.7628, recall_per_class: [0.73478261 0.83913043 0.16297787]
第3次运行:gmean: 0.7312, mauc: 0.8995, recall_per_class: [0.88695652 0.88695652 0.49698189]
第4次运行:gmean: 0.5694, mauc: 0.8252, recall_per_class: [0.76086957 0.84347826 0.28772636]
第5次运行:gmean: 0.0000, mauc: 0.5000, recall_per_class: [1. 0. 0.]
第6次运行:gmean: 0.0000, mauc: 0.7833, recall_per_class: [0.93043478 0.83913043 0.        ]
第7次运行:gmean: 0.7601, mauc: 0.8957, recall_per_class: [0.93043478 0.82608696 0.57142857]
第8次运行:gmean: 0.0000, mauc: 0.7965, recall_per_class: [0.86956522 0.88695652 0.        ]
第9次运行:gmean: 0.0000, mauc: 0.5000, recall_per_class: [0. 1. 0.]
第10次运行:gmean: 0.7234, mauc: 0.8532, recall_per_class: [0.93043478 0.66956522 0.60764588]
第11次运行:gmean: 0.0000, mauc: 0.5000, recall_per_class:

In [2]:
from utils.excel_utils import save_to_excel

columns = ['SPE']

save_path = 'C:/Users/zsc/Desktop/SPE/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

Excel 文件已保存至: C:/Users/zsc/Desktop/SPE/Splice/Splice.xlsx


'C:/Users/zsc/Desktop/SPE/Splice/Splice.xlsx'