In [1]:
from utils.excel_utils import save_to_excel
from machine_learning.sampling.sample import sample_dataset
from sklearn.neural_network import MLPClassifier
from instance_selection.parameter.parameter import *  # 导入参数的设定
from sklearn.model_selection import train_test_split
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
import warnings
from sklearn.base import clone

warnings.filterwarnings("ignore")  # 忽略警告

DATASET = Satellite  # 数据集名称（包含对应参数的字典形式）
datasetname = DATASET.DATASETNAME.split('.')[0]

# 加载、划分数据集
mat_data = sio.loadmat(IMBALANCED_DATASET_PATH + DATASET.DATASETNAME)
x = mat_data['X']
y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]

num_run = 30  # 运行次数

gmean_results = []
mauc_results = []

model = MLPClassifier(hidden_layer_sizes=(DATASET.HIDDEN_SIZE,), max_iter=DATASET.MAX_ITER,
                      random_state=RANDOM_SEED, learning_rate_init=DATASET.LEARNING_RATE)

for i in range(num_run):
    random_state = RANDOM_SEED + np.random.randint(1, 1000)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,stratify=y,
                                                        random_state=RANDOM_SEED)  # 划分数据集
    gmean_ros, mauc_ros = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='ROS')
    gmean_rus, mauc_rus = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='RUS')
    gmean_nos, mauc_nos = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='NOS')
    gmean_smote, mauc_smote = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                             random_state + np.random.randint(1, 10000),
                                             method='SMOTE')
    # gmean_adasyn, mauc_adasyn = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='ADASYN')
    gmean_borderlinesmote1, mauc_borderlinesmote1 = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                                   random_state + np.random.randint(1, 10000),
                                                                   method='BorderlineSMOTE')
    gmean_kmeanssmote, mauc_kmeanssmote = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                         random_state + np.random.randint(1, 10000),
                                                         method='KMeansSMOTE')
    gmean = [gmean_nos, gmean_rus, gmean_ros, gmean_smote, gmean_borderlinesmote1, gmean_kmeanssmote]
    mauc = [mauc_nos, mauc_rus, mauc_ros, mauc_smote, mauc_borderlinesmote1, mauc_kmeanssmote]
    gmean_results.append(gmean)
    mauc_results.append(mauc)
    # 输出结果
    print(f'第{i + 1}次运行:')
    print(f'Gmean: {gmean}')
    print(f'mAUC: {mauc}')

columns = ['NOS', 'RUS', 'ROS', 'SMOTE', 'BorderlineSMOTE', 'KMeansSMOTE']

save_path = 'C:/Users/zsc/Desktop/Sampling/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

第1次运行:
Gmean: [0.817886, 0.851755, 0.844543, 0.847962, 0.83701, 0.839301]
mAUC: [0.980246, 0.978063, 0.979926, 0.978626, 0.974184, 0.979717]
第2次运行:
Gmean: [0.817886, 0.84284, 0.838384, 0.84511, 0.850273, 0.856817]
mAUC: [0.980246, 0.97899, 0.97925, 0.978533, 0.975828, 0.980869]
第3次运行:
Gmean: [0.817886, 0.829832, 0.849452, 0.861016, 0.808377, 0.847005]
mAUC: [0.980246, 0.975335, 0.977496, 0.979958, 0.978399, 0.980484]
第4次运行:
Gmean: [0.817886, 0.848951, 0.812945, 0.826817, 0.812791, 0.846882]
mAUC: [0.980246, 0.979974, 0.972677, 0.976793, 0.9741, 0.979334]
第5次运行:
Gmean: [0.817886, 0.828542, 0.857995, 0.860619, 0.848941, 0.849538]
mAUC: [0.980246, 0.977609, 0.977811, 0.981106, 0.977013, 0.97805]
第6次运行:
Gmean: [0.817886, 0.84955, 0.84367, 0.858097, 0.848816, 0.829101]
mAUC: [0.980246, 0.977199, 0.977574, 0.979922, 0.977816, 0.974779]
第7次运行:
Gmean: [0.817886, 0.846432, 0.834178, 0.860475, 0.834233, 0.845619]
mAUC: [0.980246, 0.9789, 0.976577, 0.980716, 0.978956, 0.977563]
第8次运行:
Gmean: [0.8

'C:/Users/zsc/Desktop/Sampling/Satellite/Satellite.xlsx'

In [2]:
# 输出平均值
print(np.mean(gmean_results, axis=0))
print(np.mean(mauc_results, axis=0))

[0.817886   0.83571777 0.8470244  0.84790287 0.84132847 0.8511772 ]
[0.980246   0.9776551  0.9787872  0.9789425  0.97755487 0.97912957]


In [3]:
save_path = 'C:/Users/zsc/Desktop/Sampling/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

Excel 文件已保存至: C:/Users/zsc/Desktop/Sampling/Satellite/Satellite.xlsx


'C:/Users/zsc/Desktop/Sampling/Satellite/Satellite.xlsx'