In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler, label_binarize
import warnings
import os
warnings.filterwarnings('ignore')

In [None]:
# Set global font settings
plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# Enhanced font size configuration
plt.rcParams['font.size'] = 16
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['axes.titlesize'] = 24
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['figure.titlesize'] = 26

print("Font settings configured for enhanced readability")

## Data Loading

In [None]:
data_file = r"F:\作图目录20280825\骨质疏松数据.xlsx"
df = pd.read_excel(data_file, header=1, usecols='B:H') 
cols = ['signal_1', 'sost_1', 'signal_2', 'sost_2', 'sost_mean', 'l1_4', 'left_hip']
df.columns = cols

print(f"Data dimensions: {df.shape}")
print(f"Features: {list(df.columns)}")

In [None]:
# Add class labels
if len(df) == 103:
    df['class'] = ['Healthy'] * 35 + ['Osteopenia'] * 33 + ['Osteoporosis'] * 35
else:
    df['class'] = ['Healthy'] * 35 + ['Osteopenia'] * 33 + ['Osteoporosis'] * 36

print("Class distribution:")
print(df['class'].value_counts())

y = df['class'].map({'Healthy': 0, 'Osteopenia': 1, 'Osteoporosis': 2})
print(f"\nLabel mapping: {dict(zip(['Healthy', 'Osteopenia', 'Osteoporosis'], [0, 1, 2]))}")

## Algorithm Configuration

In [None]:
algo_configs = {
    'SVM': {
        'model': SVC(probability=True, random_state=42),
        'params': {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']},
        'needs_scaling': True,
        'color': '#24AAE3'
    },
    'Random Forest': {
        'model': RandomForestClassifier(random_state=42),
        'params': {'n_estimators': [50, 100], 'max_depth': [None, 5, 10]},
        'needs_scaling': False,
        'color': '#E9C581'
    },
    'Logistic Regression': {
        'model': LogisticRegression(random_state=42, max_iter=1000),
        'params': {'C': [0.1, 1, 10]},
        'needs_scaling': True,
        'color': '#CABBDB'
    },
    'K-Nearest Neighbors': {
        'model': KNeighborsClassifier(),
        'params': {'n_neighbors': [3, 5, 7]},
        'needs_scaling': True,
        'color': '#95B9B9'
    },
    'Decision Tree': {
        'model': DecisionTreeClassifier(random_state=42),
        'params': {'max_depth': [None, 5, 10]},
        'needs_scaling': False,
        'color': '#FDC1C1'
    },
    'Linear Discriminant Analysis': {
        'model': LinearDiscriminantAnalysis(),
        'params': {},
        'needs_scaling': False,
        'color': '#7D9A18'
    }
}

## Core Functions

In [None]:
def train_algorithm(algo_name, config, X_train, X_test, y_train, y_test):
    """Train algorithm and return prediction probabilities"""
    print(f"Training: {algo_name}...")
    
    # Data standardization
    if config['needs_scaling']:
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    else:
        X_train_scaled = X_train
        X_test_scaled = X_test
    
    # Parameter optimization
    if config['params']:
        grid_search = GridSearchCV(config['model'], config['params'], cv=3, scoring='accuracy')
        grid_search.fit(X_train_scaled, y_train)
        best_model = grid_search.best_estimator_
        print(f"  ✓ Best parameters: {grid_search.best_params_}")
    else:
        best_model = config['model']
        best_model.fit(X_train_scaled, y_train)
        print("  ✓ Using default parameters")
    
    # Prediction probabilities
    try:
        y_proba = best_model.predict_proba(X_test_scaled)
        return y_proba
    except:
        print(f"  ⚠ {algo_name} cannot generate probability predictions")
        return None

In [None]:
def plot_roc_curves_enhanced(y_true, y_proba_dict, title, feature_name, save_path=None):
    """Plot enhanced ROC curves"""
    
    y_bin = label_binarize(y_true, classes=[0, 1, 2])
    n_classes = y_bin.shape[1]
    
    fig, ax = plt.subplots(figsize=(14, 11))
    
    results = []
    
    for algo_name, y_proba in y_proba_dict.items():
        if y_proba is None:
            continue
            
        # Calculate ROC curves
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_proba[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Calculate macro-average ROC
        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(n_classes):
            mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
        mean_tpr /= n_classes
        
        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
        
        # Get color
        color = next((config['color'] for name, config in algo_configs.items() 
                     if name == algo_name), '#0066CC')
        
        # Plot macro-average ROC curve
        line_width = 4 if algo_name == 'SVM' else 3
        ax.plot(fpr["macro"], tpr["macro"],
                label=f'{algo_name} (AUC = {roc_auc["macro"]:.3f})',
                color=color, linestyle='-', linewidth=line_width, alpha=0.9)
        
        results.append({
            'algorithm': algo_name,
            'auc': roc_auc["macro"],
            'color': color
        })
    
    # Diagonal reference line
    ax.plot([0, 1], [0, 1], 'k--', linewidth=2.5, alpha=0.8, label='Random Classifier')
    
    ax.set_xlim([-0.02, 1.02])
    ax.set_ylim([-0.02, 1.02])
    
    ax.set_xlabel('False Positive Rate', fontsize=22, fontweight='bold', labelpad=15)
    ax.set_ylabel('True Positive Rate', fontsize=22, fontweight='bold', labelpad=15)
    ax.set_title(title, fontsize=26, fontweight='bold', pad=30)
    
    ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    ax.set_yticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    ax.tick_params(axis='both', which='major', labelsize=18, length=8, width=2)
    
    legend = ax.legend(loc="lower right", fontsize=16, frameon=True, 
                       fancybox=True, shadow=True, framealpha=0.95,
                       edgecolor='black', facecolor='white',
                       borderpad=1.2, labelspacing=1.5)
    legend.get_frame().set_linewidth(2)
    
    ax.set_facecolor('white')
    fig.patch.set_facecolor('white')
    
    for spine in ax.spines.values():
        spine.set_linewidth(3)
        spine.set_color('black')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(f"{save_path}_high_res.tiff", dpi=600, bbox_inches='tight', 
                    facecolor='white', format='tiff')
        plt.savefig(f"{save_path}_vector.pdf", format='pdf', bbox_inches='tight', 
                    facecolor='white')
        print(f"✓ Saved: {save_path}_high_res.tiff and {save_path}_vector.pdf")
    
    plt.show()
    return results

## SOST1 + SOST2 Six-Algorithm ROC Curves

In [None]:
print("=== Generating SOST1 + SOST2 6-algorithm ROC curves ===")

X_sost = df[['sost_1', 'sost_2']].values
X_sost_train, X_sost_test, y_sost_train, y_sost_test = train_test_split(
    X_sost, y, test_size=0.25, random_state=42, stratify=y)

# Train all algorithms
y_proba_sost_dict = {}
for algo_name, config in algo_configs.items():
    y_proba = train_algorithm(algo_name, config, X_sost_train, X_sost_test, 
                             y_sost_train, y_sost_test)
    if y_proba is not None:
        y_proba_sost_dict[algo_name] = y_proba

# Generate ROC curves
output_dir = r"F:\作图目录20280825\ROC_curves_enhanced"
os.makedirs(output_dir, exist_ok=True)

save_path_sost = os.path.join(output_dir, "ROC_SOST1_SOST2_6algorithms")
results_sost = plot_roc_curves_enhanced(
    y_sost_test, y_proba_sost_dict, 
    'SOST1 + SOST2 Features', 
    'SOST1+SOST2', 
    save_path_sost
)

print("\n=== SOST1+SOST2 6-algorithms completed ===")

## Signal1 + Signal2 Six-Algorithm ROC Curves

In [None]:
print("=== Generating Signal1 + Signal2 6-algorithm ROC curves ===")

X_signal = df[['signal_1', 'signal_2']].values
X_signal_train, X_signal_test, y_signal_train, y_signal_test = train_test_split(
    X_signal, y, test_size=0.25, random_state=42, stratify=y)

# Train all algorithms
y_proba_signal_dict = {}
for algo_name, config in algo_configs.items():
    y_proba = train_algorithm(algo_name, config, X_signal_train, X_signal_test, 
                             y_signal_train, y_signal_test)
    if y_proba is not None:
        y_proba_signal_dict[algo_name] = y_proba

# Generate ROC curves
save_path_signal = os.path.join(output_dir, "ROC_Signal1_Signal2_6algorithms")
results_signal = plot_roc_curves_enhanced(
    y_signal_test, y_proba_signal_dict, 
    'Signal1 + Signal2 Features', 
    'Signal1+Signal2', 
    save_path_signal
)

print("\n=== Signal1+Signal2 6-algorithms completed ===")

## Results Summary

In [None]:
# Create results summary table
summary_data = []

if 'results_sost' in locals():
    for result in results_sost:
        summary_data.append({
            'Feature Combination': 'SOST1+SOST2',
            'Algorithm': result['algorithm'],
            'AUC Score': f"{result['auc']:.3f}",
            'Rank': ''
        })

if 'results_signal' in locals():
    for result in results_signal:
        summary_data.append({
            'Feature Combination': 'Signal1+Signal2',
            'Algorithm': result['algorithm'],
            'AUC Score': f"{result['auc']:.3f}",
            'Rank': ''
        })

if summary_data:
    summary_df = pd.DataFrame(summary_data)
    
    # Add ranking
    summary_df['AUC_Numeric'] = summary_df['AUC Score'].astype(float)
    summary_df = summary_df.sort_values(['Feature Combination', 'AUC_Numeric'], ascending=[True, False])
    
    # Group ranking by feature combination
    summary_df['Rank'] = summary_df.groupby('Feature Combination')['AUC_Numeric'].rank(ascending=False, method='min').astype(int)
    
    # Display results
    print("=== Algorithm Performance Summary ===")
    print(summary_df[['Feature Combination', 'Algorithm', 'AUC Score', 'Rank']].to_string(index=False))
    
    # Save results
    summary_file = os.path.join(output_dir, "Algorithm_Performance_Summary.xlsx")
    summary_df[['Feature Combination', 'Algorithm', 'AUC Score', 'Rank']].to_excel(summary_file, index=False)
    print(f"\n✓ Results saved to: {summary_file}")
else:
    print("⚠ No results data available")