In [11]:
from utils.excel_utils import save_to_excel
from machine_learning.sampling.sample import sample_dataset
from sklearn.neural_network import MLPClassifier
from instance_selection.parameter.parameter import *  # 导入参数的设定
from sklearn.model_selection import train_test_split
import scipy.io as sio  # 从.mat文件中读取数据集
import numpy as np
import warnings
from sklearn.base import clone

warnings.filterwarnings("ignore")  # 忽略警告

DATASET = Contraceptive  # 数据集名称（包含对应参数的字典形式）
datasetname = DATASET.DATASETNAME.split('.')[0]

# 加载、划分数据集
mat_data = sio.loadmat(IMBALANCED_DATASET_PATH + DATASET.DATASETNAME)
x = mat_data['X']
y = mat_data['Y'][:, 0]  # mat_data['Y']得到的形状为[n,1]，通过[:,0]，得到形状[n,]

num_run = 30  # 运行次数

gmean_results = []
mauc_results = []

model = MLPClassifier(hidden_layer_sizes=(DATASET.HIDDEN_SIZE,), max_iter=DATASET.MAX_ITER,
                      random_state=RANDOM_SEED, learning_rate_init=DATASET.LEARNING_RATE)

for i in range(num_run):
    random_state = RANDOM_SEED + np.random.randint(1, 1000)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,stratify=y,
                                                        random_state=RANDOM_SEED+1)  # 划分数据集
    gmean_rus, mauc_rus = sample_dataset(clone(model), x_train, x_test, y_train, y_test,
                                         random_state + np.random.randint(1, 10000), method='RUS')
    gmean_results.append([gmean_rus])
    mauc_results.append([mauc_rus])
    # 输出结果
    print(f'第{i + 1}次运行:')
    print(f'Gmean: {gmean_rus}')
    print(f'mAUC: {mauc_rus}')

columns = ['RUS']

save_path = 'C:/Users/zsc/Desktop/Sampling/RUS/' + datasetname + '/'
filename = datasetname
save_to_excel(save_path, filename, columns, gmean_results, mauc_results)

# 输出平均值
print(np.mean(gmean_results, axis=0))
print(np.mean(mauc_results, axis=0))

第1次运行:
Gmean: 0.548006
mAUC: 0.707337
第2次运行:
Gmean: 0.535345
mAUC: 0.712296
第3次运行:
Gmean: 0.52457
mAUC: 0.716517
第4次运行:
Gmean: 0.481847
mAUC: 0.681359
第5次运行:
Gmean: 0.504443
mAUC: 0.703997
第6次运行:
Gmean: 0.526737
mAUC: 0.692637
第7次运行:
Gmean: 0.492784
mAUC: 0.694681
第8次运行:
Gmean: 0.494605
mAUC: 0.69906
第9次运行:
Gmean: 0.506759
mAUC: 0.705165
第10次运行:
Gmean: 0.513562
mAUC: 0.702896
第11次运行:
Gmean: 0.550066
mAUC: 0.709726
第12次运行:
Gmean: 0.513897
mAUC: 0.696367
第13次运行:
Gmean: 0.533686
mAUC: 0.700712
第14次运行:
Gmean: 0.514007
mAUC: 0.6834
第15次运行:
Gmean: 0.513827
mAUC: 0.681167
第16次运行:
Gmean: 0.503928
mAUC: 0.706918
第17次运行:
Gmean: 0.562288
mAUC: 0.725557
第18次运行:
Gmean: 0.538017
mAUC: 0.71498
第19次运行:
Gmean: 0.531458
mAUC: 0.719213
第20次运行:
Gmean: 0.509385
mAUC: 0.672647
第21次运行:
Gmean: 0.527143
mAUC: 0.722068
第22次运行:
Gmean: 0.489824
mAUC: 0.665355
第23次运行:
Gmean: 0.511766
mAUC: 0.69494
第24次运行:
Gmean: 0.505447
mAUC: 0.690114
第25次运行:
Gmean: 0.540143
mAUC: 0.719475
第26次运行:
Gmean: 0.525719
mAUC: 0.714666
第