In [4]:
import numpy as np
import scipy.stats
from sklearn.metrics import roc_auc_score

def calc_auc_variance(aucs, n1, n2):
    V = np.zeros((2, 2))
    k = len(aucs)
    for i in range(k):
        auc = aucs[i]
        term1 = auc / (2.0 - auc)
        term2 = (2.0 * auc * auc) / (1.0 + auc)
        V[0, 0] += term1
        V[0, 1] += term2
        V[1, 1] += term1 * term2
    V[1, 0] = V[0, 1]
    V /= (k * n1 * n2)
    return V

def delong_roc_variance(ground_truth, predictions):
    order = np.argsort(-predictions)
    ground_truth = ground_truth[order]
    predictions = predictions[order]
    pos = ground_truth == 1
    n1 = np.sum(pos)
    n2 = np.sum(~pos)
    aucs = np.zeros((n1 + n2, 1))
    aucs[pos, 0] = np.cumsum(pos) / n1
    aucs[~pos, 0] = (np.cumsum(~pos) - np.arange(1, n2 + 1)) / n2
    aucs = aucs[np.argsort(order)]
    V = calc_auc_variance(aucs, n1, n2)
    return V

def delong_roc_test(ground_truth, predictions_one, predictions_two):
    auc_one = roc_auc_score(ground_truth, predictions_one)
    auc_two = roc_auc_score(ground_truth, predictions_two)
    aucs = np.zeros((2, len(predictions_one)))
    aucs[0, :] = predictions_one
    aucs[1, :] = predictions_two
    V = np.cov(aucs)
    S = np.dot(np.dot([1, -1], np.linalg.inv(V)), [1, -1])
    z = (auc_one - auc_two) / np.sqrt(S)
    p = scipy.stats.norm.sf(abs(z)) * 2
    return auc_one, auc_two, p

In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix
from scipy.stats import norm


# 定义计算AUC及其置信区间的函数
def compute_auc_with_ci(y_true, y_scores, alpha=0.95):
    auc_value = roc_auc_score(y_true, y_scores)
    n1 = sum(y_true)
    n2 = len(y_true) - n1
    q1 = auc_value / (2 - auc_value)
    q2 = 2 * auc_value ** 2 / (1 + auc_value)
    auc_se = np.sqrt((auc_value * (1 - auc_value) + (n1 - 1) * (q1 - auc_value ** 2) + (n2 - 1) * (q2 - auc_value ** 2)) / (n1 * n2))
    lower = auc_value - norm.ppf(1 - (1 - alpha) / 2) * auc_se
    upper = auc_value + norm.ppf(1 - (1 - alpha) / 2) * auc_se
    return auc_value, lower, upper

# 定义计算灵敏度、特异度及其置信区间的函数
def compute_sensitivity_specificity_with_ci(y_true, y_pred, alpha=0.95):
    cm = confusion_matrix(y_true, y_pred)
    sensitivity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    se_sensitivity = np.sqrt((sensitivity * (1 - sensitivity)) / (cm[1, 0] + cm[1, 1]))
    se_specificity = np.sqrt((specificity * (1 - specificity)) / (cm[0, 0] + cm[0, 1]))
    lower_sensitivity = sensitivity - norm.ppf(1 - (1 - alpha) / 2) * se_sensitivity
    upper_sensitivity = sensitivity + norm.ppf(1 - (1 - alpha) / 2) * se_sensitivity
    lower_specificity = specificity - norm.ppf(1 - (1 - alpha) / 2) * se_specificity
    upper_specificity = specificity + norm.ppf(1 - (1 - alpha) / 2) * se_specificity
    return sensitivity, lower_sensitivity, upper_sensitivity, specificity, lower_specificity, upper_specificity

# 读取模型文件
model_files = {
    'all_in': 'whisper_large_v2_32_v41_test_all.csv',
    'all_ex': 'whisper_large_v2_32_rwe_test_v5_160_all.csv',
    'close_in': 'whisper_large_v2_32_v41_test_close.csv',
    'close_ex': 'whisper_large_v2_32_rwe_test_v5_160_close.csv',
    'open_in': 'whisper_large_v2_32_v41_test_open.csv',
    'open_ex': 'whisper_large_v2_32_rwe_test_v5_160_open.csv',
    'negative_in': 'whisper_large_v2_32_v41_test_negative.csv',
    'negative_ex': 'whisper_large_v2_32_rwe_test_v5_160_negative.csv',
    'neutral_in': 'whisper_large_v2_32_v41_test_neutral.csv',
    'neutral_ex': 'whisper_large_v2_32_rwe_test_v5_160_neutral.csv',
    'positive_in': 'whisper_large_v2_32_v41_test_positive.csv',
    'positive_ex': 'whisper_large_v2_32_rwe_test_v5_160_positive.csv',
    'hubert_in': 'hubert_chinese_19_v41_test_usd.csv',
    'hubert_ex': 'hubert_chinese_19_rwe_v5_160_new.csv',
    'wavlm_in': 'wavlm_12_v41_test_usd.csv',
    'wavlm_ex': 'wavlm_12_rwe_v5_160_new.csv'
}

results = []

# 计算每个模型的指标
for key, file in model_files.items():
    df = pd.read_csv(file)
    y_true = df['target_label']
    y_scores = df['depressed_score']
    
    auc_value, auc_lower, auc_upper = compute_auc_with_ci(y_true, y_scores)
    
    # 计算灵敏度和特异度
    fpr, tpr, thresholds = roc_curve(y_true, y_scores)
    y_pred = (y_scores >= 0.5).astype(int)
    
    sensitivity, lower_sensitivity, upper_sensitivity, specificity, lower_specificity, upper_specificity = compute_sensitivity_specificity_with_ci(y_true, y_pred)
        
    results.append({
        'model': key,
        'AUC': f"{auc_value:.3f}",
        'AUC CI lower': f"{auc_lower:.3f}",
        'AUC CI upper': f"{auc_upper:.3f}",
        'Sensitivity': f"{sensitivity:.3f}",
        'Sensitivity CI lower': f"{lower_sensitivity:.3f}",
        'Sensitivity CI upper': f"{upper_sensitivity:.3f}",
        'Specificity': f"{specificity:.3f}",
        'Specificity CI lower': f"{lower_specificity:.3f}",
        'Specificity CI upper': f"{upper_specificity:.3f}"
})


results_df = pd.DataFrame(results)
print(results_df)



          model    AUC AUC CI lower AUC CI upper Sensitivity  \
0        all_in  0.932        0.903        0.961       0.889   
1        all_ex  0.879        0.825        0.934       0.825   
2      close_in  0.911        0.878        0.944       0.833   
3      close_ex  0.848        0.787        0.908       0.700   
4       open_in  0.913        0.881        0.946       0.821   
5       open_ex  0.831        0.767        0.894       0.775   
6   negative_in  0.910        0.877        0.943       0.870   
7   negative_ex  0.828        0.763        0.892       0.775   
8    neutral_in  0.898        0.862        0.933       0.840   
9    neutral_ex  0.837        0.774        0.900       0.725   
10  positive_in  0.925        0.895        0.955       0.852   
11  positive_ex  0.840        0.777        0.902       0.700   
12    hubert_in  0.916        0.884        0.947       0.901   
13    hubert_ex  0.819        0.753        0.884       0.762   
14     wavlm_in  0.906        0.873     