## Wilcoxon符号秩检验

In [2]:
import numpy as np
import pandas as pd
from scipy import stats
import itertools

# 示例数据生成（实际替换为你的真实数据）
np.random.seed(42)
data = {
    'NOS': np.random.normal(loc=0.85, scale=0.05, size=30),
    'RUS': np.random.normal(loc=0.87, scale=0.05, size=30),
    'ROS': np.random.normal(loc=0.88, scale=0.05, size=30),
    'SMOTE': np.random.normal(loc=0.86, scale=0.05, size=30),
    'BorderlineSMOTE-1': np.random.normal(loc=0.84, scale=0.05, size=30),
    'KMeansSMOTE': np.random.normal(loc=0.85, scale=0.05, size=30),
    'MILE':  np.random.normal(loc=0.86, scale=0.05, size=30)
}
df = pd.DataFrame(data)
# 生成所有算法对
algorithms = df.columns.tolist()
pairs = list(itertools.combinations(algorithms, 2))

# 存储结果
results = []

# 执行所有两两比较
for pair in pairs:
    stat, p = stats.wilcoxon(df[pair[0]], df[pair[1]])
    results.append({
        'Algorithm 1': pair[0],
        'Algorithm 2': pair[1],
        'Statistic': stat,
        'p-value': p
    })

# 转换为DataFrame
results_df = pd.DataFrame(results)
print(results_df)

          Algorithm 1        Algorithm 2  Statistic   p-value
0                 NOS                RUS      137.0  0.049710
1                 NOS                ROS       92.0  0.002987
2                 NOS              SMOTE      162.0  0.151887
3                 NOS  BorderlineSMOTE-1      227.0  0.919297
4                 NOS        KMeansSMOTE      127.0  0.029326
5                 NOS               MILE      146.0  0.076721
6                 RUS                ROS      185.0  0.338742
7                 RUS              SMOTE      213.0  0.700033
8                 RUS  BorderlineSMOTE-1      117.0  0.016431
9                 RUS        KMeansSMOTE      227.0  0.919297
10                RUS               MILE      225.0  0.887195
11                ROS              SMOTE      159.0  0.134735
12                ROS  BorderlineSMOTE-1       78.0  0.000952
13                ROS        KMeansSMOTE      173.0  0.228553
14                ROS               MILE      137.0  0.049710
15      

In [2]:
from statsmodels.stats.multitest import multipletests

# 提取原始p值
p_values = results_df['p-value'].values

# 应用Benjamini-Hochberg校正（控制FDR）
rejected, corrected_p, _, _ = multipletests(p_values, method='fdr_bh')

# 添加校正后结果
results_df['Corrected p-value'] = corrected_p
results_df['Significant (α=0.05)'] = rejected

print(results_df.sort_values(by='p-value'))

  Algorithm 1 Algorithm 2  Wilcoxon Statistic   p-value  Corrected p-value  \
8           C           E                78.0  0.000952           0.009518   
1           A           C                92.0  0.002987           0.014935   
6           B           E               117.0  0.016431           0.054771   
0           A           B               137.0  0.049710           0.124275   
9           D           E               142.0  0.063556           0.127113   
7           C           D               159.0  0.134735           0.216981   
2           A           D               162.0  0.151887           0.216981   
4           B           C               185.0  0.338742           0.423427   
5           B           D               213.0  0.700033           0.777814   
3           A           E               227.0  0.919297           0.919297   

   Significant (α=0.05)  
8                  True  
1                  True  
6                 False  
0                 False  
9          

In [3]:
data = {
    'NOS': np.random.normal(loc=0.85, scale=0.05, size=30),
    'RUS': np.random.normal(loc=0.87, scale=0.05, size=30),
    'ROS': np.random.normal(loc=0.88, scale=0.05, size=30),
    'SMOTE': np.random.normal(loc=0.86, scale=0.05, size=30),
    'BorderlineSMOTE-1': np.random.normal(loc=0.84, scale=0.05, size=30),
    'KMeansSMOTE': np.random.normal(loc=0.85, scale=0.05, size=30),
    'MILE':  np.random.normal(loc=0.86, scale=0.05, size=30)
}
print(data)

{'NOS': array([0.87854453, 0.90677828, 0.89770009, 0.88256956, 0.83423654,
       0.88794846, 0.81135874, 0.83815907, 0.82573182, 0.85409371,
       0.96573293, 0.75663674, 0.88431301, 0.76936421, 0.82640341,
       0.90444753, 0.853214  , 0.79611276, 0.81423481, 0.88397989,
       0.81348167, 0.86082293, 0.85227859, 0.81741998, 0.9571972 ,
       0.88169595, 0.74874287, 0.85932272, 0.81691068, 0.89262167]), 'RUS': array([0.83037396, 0.86426318, 0.89524936, 0.91328776, 0.80998518,
       0.85327494, 0.84625273, 0.83733354, 0.95827271, 0.89024909,
       0.8069558 , 0.9158931 , 0.97610781, 0.92162326, 0.7940315 ,
       0.8457883 , 0.93334556, 0.83461653, 0.89219097, 0.9087317 ,
       0.82365348, 0.86702373, 0.70793663, 0.81878062, 0.85737159,
       0.80761084, 0.95162057, 0.79849293, 0.84799778, 0.87653703]), 'ROS': array([0.95206366, 0.80820689, 0.93815819, 0.88051165, 0.83092457,
       0.90310517, 0.88995298, 0.84998916, 0.8834901 , 0.86073432,
       0.88567587, 0.91310653, 0.959