## 加载数据

In [1]:
import pandas as pd
from sklearn.base import clone
from config import Datasets, Datasets_3
from feature_selection import FeatureSelection, non_dominated_sort, train_and_test
from skfeature.function.similarity_based import fisher_score
from skfeature.function.information_theoretical_based import CIFE
from skfeature.function.statistical_based import chi_square
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
import warnings

warnings.filterwarnings("ignore")  # 忽略警告

# 数据集
fs = FeatureSelection(Datasets_3)
# 创建带有列名的空DataFrame
df = pd.DataFrame(columns=['数据集', '类分布', '特征数量', '特征选择数量', '原始', 'SMOTE', '特征选择+SMOTE'])
for i in range(len(Datasets_3)):
    # 数据预处理
    fs.pre_process(Datasets_3[i], random_state=42)
    print(f"{i + 1}th dataset: {fs.dataset.DATASETNAME}")
    fs.display_distribution()

    # 特征选择（三种不同的选择算法）
    idx_1 = fs.feature_selection(fisher_score.fisher_score, mode='index')
    # print(idx_1)
    idx_2 = fs.feature_selection(chi_square.chi_square, mode='index')
    # print(idx_2)
    idx_3 = fs.feature_selection(CIFE.cife, mode='index', n_selected_features=fs.x_train.shape[1])
    # print(idx_3)

    # 非支配排序（三种算法的特征排名）
    all_fronts = non_dominated_sort(idx_1, idx_2, idx_3)

    # 前后结果对比（原始数据、SMOTE、特征选择+SMOTE）
    model = MLPClassifier(hidden_layer_sizes=(fs.dataset.HIDDEN_SIZE,), max_iter=fs.dataset.MAX_ITER,
                          random_state=42, learning_rate_init=fs.dataset.LEARNING_RATE)
    res_1 = train_and_test(clone(model), fs.x_train, fs.x_test, fs.y_train, fs.y_test)
    print(f"原始：{res_1}")
    x_train, y_train = SMOTE(random_state=42, k_neighbors=fs.dataset.K_NEIGHBORS).fit_resample(fs.x_train, fs.y_train)
    res_2 = train_and_test(clone(model), x_train, fs.x_test, y_train, fs.y_test)
    print(f"SMOTE：{res_2}")
    x_train, y_train = SMOTE(random_state=42, k_neighbors=fs.dataset.K_NEIGHBORS).fit_resample(
        fs.x_train[:, all_fronts[0]], fs.y_train)
    res_3 = train_and_test(clone(model), x_train, fs.x_test[:, all_fronts[0]], y_train, fs.y_test)
    print(f"特征选择+SMOTE：{res_3}")
    # 保存结果
    df.loc[i] = [fs.dataset.DATASETNAME, fs.distribution, fs.x_train.shape[1],
                 len(all_fronts[0]), res_1, res_2, res_3]
#  保存结果
df.to_csv('feature_selection_result.csv', index=False)

1th dataset: GLI-85.mat
trainset distribution: [18 41]
testset distribution: [ 8 18]
number of feature: 22283
原始：(0.816497, 0.854167, 0.819444)
SMOTE：(0.816497, 0.791667, 0.819444)
特征选择+SMOTE：(0.677003, 0.756944, 0.640553)
