In [5]:
from utils.excel_utils import save_to_excel
from machine_learning.sampling.sample import sample_dataset
from sklearn.neural_network import MLPClassifier
from instance_selection.parameter.parameter import *  # 导入参数的设定
from sklearn.model_selection import train_test_split
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
import warnings
from sklearn.base import clone

warnings.filterwarnings("ignore")  # 忽略警告

DATASET = Satellite  # 数据集名称（包含对应参数的字典形式）
datasetname = DATASET.DATASETNAME.split('.')[0]

# 加载、划分数据集
mat_data = sio.loadmat(IMBALANCED_DATASET_PATH + DATASET.DATASETNAME)
x = mat_data['X']
y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]

num_run = 30  # 运行次数

gmean_results = []
mauc_results = []

model = MLPClassifier(hidden_layer_sizes=(DATASET.HIDDEN_SIZE,), max_iter=DATASET.MAX_ITER,
                      random_state=RANDOM_SEED, learning_rate_init=DATASET.LEARNING_RATE)

for i in range(num_run):
    random_state = RANDOM_SEED + np.random.randint(1, 1000)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,stratify=y,
                                                        random_state=RANDOM_SEED)  # 划分数据集
    gmean_ros, mauc_ros = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state, method='ROS')
    gmean_rus, mauc_rus = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state, method='RUS')
    gmean_nos, mauc_nos = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state, method='NOS')
    gmean_smote, mauc_smote = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                             random_state,
                                             method='SMOTE')
    # gmean_adasyn, mauc_adasyn = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='ADASYN')
    gmean_borderlinesmote1, mauc_borderlinesmote1 = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                                   random_state + np.random.randint(1, 10000),
                                                                   method='BorderlineSMOTE')
    gmean_kmeanssmote, mauc_kmeanssmote = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                         random_state + np.random.randint(1, 10000),
                                                         method='KMeansSMOTE')
    gmean = [gmean_nos, gmean_rus, gmean_ros, gmean_smote, gmean_borderlinesmote1, gmean_kmeanssmote]
    mauc = [mauc_nos, mauc_rus, mauc_ros, mauc_smote, mauc_borderlinesmote1, mauc_kmeanssmote]
    gmean_results.append(gmean)
    mauc_results.append(mauc)
    # 输出结果
    print(f'第{i + 1}次运行:')
    print(f'Gmean: {gmean}')
    print(f'mAUC: {mauc}')

columns = ['NOS', 'RUS', 'ROS', 'SMOTE', 'BorderlineSMOTE', 'KMeansSMOTE']

save_path = 'C:/Users/zsc/Desktop/Sampling/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

第1次运行:
Gmean: [0.817886, 0.838213, 0.859143, 0.845074, 0.840105, 0.861469]
mAUC: [0.980246, 0.979313, 0.980371, 0.977998, 0.976283, 0.980497]
第2次运行:
Gmean: [0.817886, 0.850023, 0.84415, 0.858879, 0.836024, 0.857852]
mAUC: [0.980246, 0.977671, 0.979846, 0.98082, 0.976615, 0.979682]
第3次运行:
Gmean: [0.817886, 0.848808, 0.859725, 0.855776, 0.860399, 0.846782]
mAUC: [0.980246, 0.978101, 0.981295, 0.980151, 0.980885, 0.979722]
第4次运行:
Gmean: [0.817886, 0.859798, 0.853379, 0.853129, 0.854691, 0.856485]
mAUC: [0.980246, 0.981098, 0.97762, 0.978667, 0.979877, 0.980234]
第5次运行:
Gmean: [0.817886, 0.863259, 0.858892, 0.866001, 0.856061, 0.867128]
mAUC: [0.980246, 0.978883, 0.9809, 0.982327, 0.981511, 0.980584]
第6次运行:
Gmean: [0.817886, 0.84399, 0.837502, 0.856532, 0.838599, 0.843793]
mAUC: [0.980246, 0.979117, 0.978682, 0.980901, 0.975979, 0.979555]
第7次运行:
Gmean: [0.817886, 0.860562, 0.85208, 0.847998, 0.837535, 0.858243]
mAUC: [0.980246, 0.980566, 0.979205, 0.979836, 0.976338, 0.980635]
第8次运行:
Gmean:

'C:/Users/zsc/Desktop/Sampling/Satellite/Satellite.xlsx'

In [6]:
# 输出平均值
print(np.mean(gmean_results, axis=0))
print(np.mean(mauc_results, axis=0))

[0.817886   0.84420013 0.84657697 0.85134413 0.84788453 0.85257133]
[0.980246   0.97883863 0.97871043 0.97947473 0.97884577 0.97945147]
