In [4]:
from utils.excel_utils import save_to_excel
from machine_learning.sampling.sample import sample_dataset
from sklearn.neural_network import MLPClassifier
from instance_selection.parameter.parameter import *  # 导入参数的设定
from sklearn.model_selection import train_test_split
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
import warnings
from sklearn.base import clone

warnings.filterwarnings("ignore")  # 忽略警告

DATASET = Splice  # 数据集名称（包含对应参数的字典形式）
datasetname = DATASET.DATASETNAME.split('.')[0]

# 加载、划分数据集
mat_data = sio.loadmat(IMBALANCED_DATASET_PATH + DATASET.DATASETNAME)
x = mat_data['X']
y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]

num_run = 30  # 运行次数

gmean_results = []
mauc_results = []

model = MLPClassifier(hidden_layer_sizes=(DATASET.HIDDEN_SIZE,), max_iter=DATASET.MAX_ITER,
                      random_state=RANDOM_SEED+1, learning_rate_init=DATASET.LEARNING_RATE)

for i in range(num_run):
    random_state = RANDOM_SEED + np.random.randint(1, 1000)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,stratify=y,
                                                        random_state=RANDOM_SEED+1)  # 划分数据集
    gmean_ros, mauc_ros = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='ROS')
    gmean_rus, mauc_rus = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='RUS')
    gmean_nos, mauc_nos = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='NOS')
    gmean_smote, mauc_smote = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                             random_state + np.random.randint(1, 10000),
                                             method='SMOTE')
    # gmean_adasyn, mauc_adasyn = sample_dataset(clone(model), x_train, x_test, y_train, y_test, random_state, method='ADASYN')
    gmean_borderlinesmote1, mauc_borderlinesmote1 = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                                   random_state + np.random.randint(1, 10000),
                                                                   method='BorderlineSMOTE')
    gmean_kmeanssmote, mauc_kmeanssmote = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                                         random_state + np.random.randint(1, 10000),
                                                         method='KMeansSMOTE')
    gmean = [gmean_nos, gmean_rus, gmean_ros, gmean_smote, gmean_borderlinesmote1, gmean_kmeanssmote]
    mauc = [mauc_nos, mauc_rus, mauc_ros, mauc_smote, mauc_borderlinesmote1, mauc_kmeanssmote]
    gmean_results.append(gmean)
    mauc_results.append(mauc)
    # 输出结果
    print(f'第{i + 1}次运行:')
    print(f'Gmean: {gmean}')
    print(f'mAUC: {mauc}')

columns = ['NOS', 'RUS', 'ROS', 'SMOTE', 'BorderlineSMOTE', 'KMeansSMOTE']

save_path = 'C:/Users/zsc/Desktop/Sampling/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

第1次运行:
Gmean: [0.842028, 0.796123, 0.836649, 0.8377, 0.829571, 0.828935]
mAUC: [0.951893, 0.926352, 0.94597, 0.939076, 0.937288, 0.949315]
第2次运行:
Gmean: [0.842028, 0.816059, 0.841628, 0.845186, 0.829061, 0.840476]
mAUC: [0.951893, 0.939198, 0.940448, 0.946353, 0.938172, 0.94766]
第3次运行:
Gmean: [0.842028, 0.804349, 0.854496, 0.833981, 0.822085, 0.824084]
mAUC: [0.951893, 0.933672, 0.952424, 0.941889, 0.944194, 0.944441]
第4次运行:
Gmean: [0.842028, 0.814999, 0.827174, 0.838201, 0.814816, 0.825114]
mAUC: [0.951893, 0.943781, 0.94726, 0.94715, 0.931788, 0.938756]
第5次运行:
Gmean: [0.842028, 0.813679, 0.837527, 0.828748, 0.81513, 0.818871]
mAUC: [0.951893, 0.936983, 0.942833, 0.94207, 0.940104, 0.936007]
第6次运行:
Gmean: [0.842028, 0.821814, 0.818952, 0.818316, 0.828009, 0.844903]
mAUC: [0.951893, 0.937796, 0.940653, 0.942569, 0.935072, 0.950669]
第7次运行:
Gmean: [0.842028, 0.84867, 0.825228, 0.837975, 0.829568, 0.80308]
mAUC: [0.951893, 0.941169, 0.941975, 0.944715, 0.937137, 0.928654]
第8次运行:
Gmean: [0

'C:/Users/zsc/Desktop/Sampling/Splice/Splice.xlsx'

In [5]:
# 输出平均值
print(np.mean(gmean_results, axis=0))
print(np.mean(mauc_results, axis=0))

[0.842028   0.82080003 0.83308793 0.83251657 0.82969827 0.82592247]
[0.951893   0.93730543 0.94227907 0.94354103 0.94013443 0.9411319 ]


In [6]:
save_path = 'C:/Users/zsc/Desktop/Sampling/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

Excel 文件已保存至: C:/Users/zsc/Desktop/Sampling/Splice/Splice.xlsx


'C:/Users/zsc/Desktop/Sampling/Splice/Splice.xlsx'