## 加载数据

In [None]:
import pandas as pd
from sklearn.base import clone
from sklearn.svm import SVC
from config import Datasets, Datasets_test
from feature_selection import FeatureSelection, non_dominated_sort, train_and_test
from skfeature.function.similarity_based import fisher_score
from skfeature.function.information_theoretical_based import CIFE
from skfeature.function.statistical_based import chi_square
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
import warnings

warnings.filterwarnings("ignore")  # 忽略警告

# 数据集
fs = FeatureSelection(Datasets_test)
# 创建带有列名的空DataFrame
df = pd.DataFrame(columns=['数据集', '类分布', '特征数量', '特征选择数量', '原始', 'SMOTE', '特征选择+SMOTE'])
for i in range(len(Datasets_test)):
    # 数据预处理
    fs.pre_process(Datasets_test[i], random_state=42)
    print(f"{i + 1}th dataset: {fs.dataset.DATASETNAME}")
    fs.display_distribution()

    # 特征选择（三种不同的选择算法）
    idx_1 = fs.feature_selection(fisher_score.fisher_score, mode='index')
    # print(idx_1)
    idx_2 = fs.feature_selection(chi_square.chi_square, mode='index')
    # print(idx_2)
    idx_3 = fs.feature_selection(CIFE.cife, mode='index', n_selected_features=fs.x_train.shape[1])
    # print(idx_3)

    # 非支配排序（三种算法的特征排名）
    all_fronts = non_dominated_sort(idx_1, idx_2, idx_3)

    # 前后结果对比（原始数据、SMOTE、特征选择+SMOTE）
    # model = MLPClassifier(hidden_layer_sizes=(fs.dataset.HIDDEN_SIZE,), max_iter=fs.dataset.MAX_ITER,
    #                       random_state=42, learning_rate_init=fs.dataset.LEARNING_RATE)
    model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42)
    # 基于sklearn创建一个SVM
    
    res_1 = train_and_test(clone(model), fs.x_train, fs.x_test, fs.y_train, fs.y_test)
    print(f"原始：{res_1}")
    x_train, y_train = SMOTE(random_state=42, k_neighbors=fs.dataset.K_NEIGHBORS).fit_resample(fs.x_train, fs.y_train)
    res_2 = train_and_test(clone(model), x_train, fs.x_test, y_train, fs.y_test)
    print(f"SMOTE：{res_2}")
    x_train, y_train = SMOTE(random_state=42, k_neighbors=fs.dataset.K_NEIGHBORS).fit_resample(
        fs.x_train[:, all_fronts[0]], fs.y_train)
    res_3 = train_and_test(clone(model), x_train, fs.x_test[:, all_fronts[0]], y_train, fs.y_test)
    print(f"特征选择+SMOTE：{res_3}")
    # 保存结果
    df.loc[i] = [fs.dataset.DATASETNAME, fs.distribution, fs.x_train.shape[1],
                 len(all_fronts[0]), res_1, res_2, res_3]
#  保存结果
df.to_csv('feature_selection_result.csv', encoding='utf_8_sig', index=False)

1th dataset: GLIOMA.mat
trainset distribution: [10  5 10 10]
testset distribution: [4 2 4 5]
number of feature: 4434
原始：(0.66874, 0.947917, 0.715152)
SMOTE：(0.66874, 0.947917, 0.715152)
特征选择+SMOTE：(0.66874, 0.94375, 0.709524)
2th dataset: Lung.mat
trainset distribution: [97 12 15 14  4]
testset distribution: [42  5  6  6  2]
number of feature: 3312
原始：(0.917674, 0.996746, 0.934759)
SMOTE：(0.721295, 0.996508, 0.81303)
特征选择+SMOTE：(0.756409, 0.985198, 0.807772)
3th dataset: Ovarian.mat
trainset distribution: [113  64]
testset distribution: [49 27]
number of feature: 15154


In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# 加载鸢尾花数据集
iris = datasets.load_iris()
X = iris.data
y = iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 创建SVM分类器（设置probability=True以启用概率估计）
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42)

# 训练模型
svm_model.fit(X_train, y_train)

# 预测测试集
y_pred = svm_model.predict(X_test)

# 输出预测的概率值
probabilities = svm_model.predict_proba(X_test)
print("\n预测概率值:")
print(probabilities)

# 输出分类报告
print("\n分类报告:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))

# 输出前5个样本的详细预测信息
print("\n前5个样本的详细预测信息:")
for i in range(5):
    print(f"样本 {i+1}:")
    print(f"真实类别: {iris.target_names[y_test[i]]}")
    print(f"预测类别: {iris.target_names[y_pred[i]]}")
    print("类别概率:")
    for j, name in enumerate(iris.target_names):
        print(f"  {name}: {probabilities[i][j]:.4f}")
    print()


预测概率值:
[[0.01468339 0.95280662 0.03251   ]
 [0.96441134 0.02064276 0.01494589]
 [0.05070953 0.04512495 0.90416552]
 [0.0129761  0.88190436 0.10511954]
 [0.02285246 0.80976336 0.16738418]
 [0.96744739 0.01992946 0.01262315]
 [0.02936508 0.95872657 0.01190835]
 [0.0147084  0.02562382 0.95966778]
 [0.02873884 0.7483095  0.22295166]
 [0.01384762 0.97671742 0.00943496]
 [0.01544183 0.09286162 0.89169655]
 [0.97244221 0.01628972 0.01126807]
 [0.97067156 0.01581101 0.01351742]
 [0.97345878 0.01501209 0.01152913]
 [0.97183534 0.0153252  0.01283946]
 [0.02745404 0.80070372 0.17184224]
 [0.01182279 0.00580136 0.98237585]
 [0.01208675 0.97618523 0.01172802]
 [0.01295109 0.94756237 0.03948653]
 [0.01266621 0.00556073 0.98177306]
 [0.97284446 0.01499024 0.0121653 ]
 [0.01613952 0.26415236 0.71970812]
 [0.97341836 0.01536271 0.01121893]
 [0.01180803 0.00746868 0.98072329]
 [0.04068905 0.06070084 0.89861011]
 [0.01402062 0.0137642  0.97221518]
 [0.01959708 0.0425948  0.93780812]
 [0.01171237 0.00882