In [1]:
from utils.excel_utils import save_to_excel
from machine_learning.sampling.sample import sample_dataset
from sklearn.neural_network import MLPClassifier
from instance_selection.parameter.parameter import *  # 导入参数的设定
from sklearn.model_selection import train_test_split
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
import warnings
from sklearn.base import clone

warnings.filterwarnings("ignore")  # 忽略警告

DATASET = Automobile  # 数据集名称（包含对应参数的字典形式）
datasetname = DATASET.DATASETNAME.split('.')[0]

# 加载、划分数据集
mat_data = sio.loadmat(IMBALANCED_DATASET_PATH + DATASET.DATASETNAME)
x = mat_data['X']
y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]

num_run = 30  # 运行次数

gmean_results = []
mauc_results = []

model = MLPClassifier(hidden_layer_sizes=(DATASET.HIDDEN_SIZE,), max_iter=DATASET.MAX_ITER,
                      random_state=RANDOM_SEED, learning_rate_init=DATASET.LEARNING_RATE)

for i in range(num_run):
    random_state = RANDOM_SEED + np.random.randint(1, 1000)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, stratify=y, random_state=RANDOM_SEED+1)  # 划分数据集
    gmean_ros, mauc_ros = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='ROS')
    gmean_rus, mauc_rus = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='RUS')
    gmean_nos, mauc_nos = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='NOS')
    gmean_smote, mauc_smote = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state,
                                             method='SMOTE')
    # gmean_adasyn, mauc_adasyn = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='ADASYN')
    gmean_borderlinesmote1, mauc_borderlinesmote1 = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                                   random_state, method='BorderlineSMOTE')
    gmean_kmeanssmote, mauc_kmeanssmote = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state,
                                                         method='KMeansSMOTE')
    gmean = [gmean_nos, gmean_rus, gmean_ros, gmean_smote, gmean_borderlinesmote1, gmean_kmeanssmote]
    mauc = [mauc_nos, mauc_rus, mauc_ros, mauc_smote, mauc_borderlinesmote1, mauc_kmeanssmote]
    gmean_results.append(gmean)
    mauc_results.append(mauc)
    # 输出结果
    print(f'第{i + 1}次运行:')
    print(f'Gmean: {gmean}')
    print(f'mAUC: {mauc}')

columns = ['NOS', 'RUS', 'ROS', 'SMOTE', 'BorderlineSMOTE', 'KMeansSMOTE']

save_path = 'C:/Users/zsc/Desktop/Sampling/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

第1次运行:
Gmean: [0.653347, 0.671031, 0.616817, 0.0, 0.629953, 0.723626]
mAUC: [0.897709, 0.865977, 0.891591, 0.873238, 0.893726, 0.892933]
第2次运行:
Gmean: [0.653347, 0.520668, 0.616817, 0.629953, 0.0, 0.639724]
mAUC: [0.897709, 0.784018, 0.907285, 0.897284, 0.863804, 0.883947]
第3次运行:
Gmean: [0.653347, 0.495145, 0.639724, 0.629953, 0.59473, 0.708537]
mAUC: [0.897709, 0.808447, 0.896485, 0.885743, 0.868169, 0.903297]
第4次运行:
Gmean: [0.653347, 0.54089, 0.0, 0.606176, 0.0, 0.708537]
mAUC: [0.897709, 0.779162, 0.8797, 0.904752, 0.879582, 0.892593]
第5次运行:
Gmean: [0.653347, 0.495145, 0.589895, 0.59473, 0.587773, 0.589895]
mAUC: [0.897709, 0.806524, 0.904034, 0.88373, 0.897869, 0.878656]
第6次运行:
Gmean: [0.653347, 0.543946, 0.62483, 0.0, 0.0, 0.616817]
mAUC: [0.897709, 0.815571, 0.90446, 0.887434, 0.874792, 0.880641]
第7次运行:
Gmean: [0.653347, 0.59473, 0.606176, 0.629953, 0.598091, 0.629953]
mAUC: [0.897709, 0.800959, 0.885936, 0.881203, 0.889975, 0.892952]
第8次运行:
Gmean: [0.653347, 0.683166, 0.606176, 

'C:/Users/zsc/Desktop/Sampling/Automobile/Automobile.xlsx'