In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import ast
from glob import glob
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

The performance of 3 models are analyzed under their respective header in this notebook (i.e., CheXpert-trained, MIMIC-trained, NIH-trained models)

# CheXpert

In [None]:
import pandas as pd
import ast
from sklearn import metrics

def evaluate_age_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Age_Probability'] = predict_df['Age_Probability'].apply(lambda x: ast.literal_eval(x))

        # Normalize true labels
        y_true = predict_df['Patient Age Category'].apply(lambda x: int(float(x)))

        # Probabilities for all 5 classes
        y_score = predict_df['Age_Probability'].tolist()

        # Compute weighted AUROC
        wAUROC = metrics.roc_auc_score(
            y_true,
            y_score,
            multi_class='ovr',
            average='weighted'
        )

        # Compute weighted AUPRC
        wAUPRC = metrics.average_precision_score(
            y_true,
            y_score,
            average='weighted'
        )

        print(f'{label_name} - wAUROC: {wAUROC:.4f}, wAUPRC: {wAUPRC:.4f}')

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")

# Define files and labels
csv_files_with_labels = [
    ('/content/chexpert_age_brax_test_full.csv', 'BRAX'),
    ('/content/chexpert_age_chexpert_test.csv', 'CheXpert'),
    ('/content/chexpert_age_jsrt_test.csv', 'JSRT'),
    ('/content/chexpert_age_mimic_test.csv', 'MIMIC'),
    ('/content/chexpert_age_nih_test.csv', 'NIH'),
    ('/content/chexpert_age_padchest_test_full.csv', 'PadChest'),
    ('/content/chexpert_age_vindr_test.csv', 'VinDR'),
    ('/content/chexpert_age_shenzhen_test.csv', 'Shenzhen')
]

# Run evaluations
for file_path, label in csv_files_with_labels:
    evaluate_age_csv(file_path, label)


BRAX - wAUROC: 0.9158, wAUPRC: 0.7418
CheXpert - wAUROC: 0.8803, wAUPRC: 0.7205
JSRT - wAUROC: 0.8478, wAUPRC: 0.7054
MIMIC - wAUROC: 0.8312, wAUPRC: 0.6271
NIH - wAUROC: 0.8612, wAUPRC: 0.6979


  predict_df = pd.read_csv(csv_file_path)


PadChest - wAUROC: 0.4900, wAUPRC: 0.2927
VinDR - wAUROC: 0.8609, wAUPRC: 0.6779
Shenzhen - wAUROC: 0.7925, wAUPRC: 0.6772


In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn import metrics

def bootstrap_wauc_age(csv_file_path, label_name, n_iterations=1000, random_state=2025):
    try:
        # Load and preprocess the data
        df = pd.read_csv(csv_file_path)
        df['Age_Probability'] = df['Age_Probability'].apply(lambda x: ast.literal_eval(x))
        df['y_true'] = df['Patient Age Category'].apply(lambda x: int(float(x)))
        df['y_score'] = df['Age_Probability']

        # Drop missing labels if any
        df = df[df['y_true'].notna()].copy()

        # Convert to NumPy for efficient indexing
        y_true_array = df['y_true'].values
        y_score_array = np.array(df['y_score'].tolist())

        # Bootstrap loop
        rng = np.random.default_rng(seed=random_state)
        auroc_scores = []
        auprc_scores = []

        for _ in range(n_iterations):
            # Sample with replacement
            indices = rng.choice(len(df), size=len(df), replace=True)
            y_true_sample = y_true_array[indices]
            y_score_sample = y_score_array[indices]

            try:
                # AUROC
                wAUROC = metrics.roc_auc_score(
                    y_true_sample,
                    y_score_sample,
                    multi_class='ovr',
                    average='weighted'
                )
                # AUPRC
                wAUPRC = metrics.average_precision_score(
                    y_true_sample,
                    y_score_sample,
                    average='weighted'
                )

                auroc_scores.append(wAUROC)
                auprc_scores.append(wAUPRC)
            except ValueError:
                # Skip iteration if metrics can't be calculated (e.g., only one class in sample)
                continue

        # Compute confidence intervals
        auroc_ci = np.percentile(auroc_scores, [2.5, 97.5])
        auprc_ci = np.percentile(auprc_scores, [2.5, 97.5])

        print(f"{label_name} - AUROC 95% CI: [{auroc_ci[0]:.4f}, {auroc_ci[1]:.4f}], "
              f"AUPRC 95% CI: [{auprc_ci[0]:.4f}, {auprc_ci[1]:.4f}]")

        return auroc_ci, auprc_ci

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")
        return None, None
# Define files and labels
csv_files_with_labels = [
    ('/content/chexpert_age_brax_test_full.csv', 'BRAX'),
    ('/content/chexpert_age_chexpert_test.csv', 'CheXpert'),
    ('/content/chexpert_age_jsrt_test.csv', 'JSRT'),
    ('/content/chexpert_age_mimic_test.csv', 'MIMIC'),
    ('/content/chexpert_age_nih_test.csv', 'NIH'),
    ('/content/chexpert_age_padchest_test_full.csv', 'PadChest'),
    ('/content/chexpert_age_vindr_test.csv', 'VinDR'),
    ('/content/chexpert_age_shenzhen_test.csv', 'Shenzhen')
]

# Run evaluations
for file_path, label in csv_files_with_labels:
    bootstrap_wauc_age(file_path, label)


BRAX - AUROC 95% CI: [0.9134, 0.9185], AUPRC 95% CI: [0.7355, 0.7494]
CheXpert - AUROC 95% CI: [0.8773, 0.8832], AUPRC 95% CI: [0.7140, 0.7268]
JSRT - AUROC 95% CI: [0.8098, 0.8832], AUPRC 95% CI: [0.6490, 0.7800]
MIMIC - AUROC 95% CI: [0.8286, 0.8339], AUPRC 95% CI: [0.6223, 0.6329]
NIH - AUROC 95% CI: [0.8576, 0.8647], AUPRC 95% CI: [0.6915, 0.7055]


  df = pd.read_csv(csv_file_path)


PadChest - AUROC 95% CI: [0.4877, 0.4923], AUPRC 95% CI: [0.2906, 0.2950]
VinDR - AUROC 95% CI: [0.8531, 0.8688], AUPRC 95% CI: [0.6628, 0.6945]
Shenzhen - AUROC 95% CI: [0.7636, 0.8194], AUPRC 95% CI: [0.6473, 0.7206]


#MIMIC

In [None]:
import pandas as pd
import ast
from sklearn import metrics

def evaluate_age_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Age_Probability'] = predict_df['Age_Probability'].apply(lambda x: ast.literal_eval(x))

        # Normalize true labels
        y_true = predict_df['Patient Age Category'].apply(lambda x: int(float(x)))

        # Probabilities for all 5 classes
        y_score = predict_df['Age_Probability'].tolist()

        # Compute weighted AUROC
        wAUROC = metrics.roc_auc_score(
            y_true,
            y_score,
            multi_class='ovr',
            average='weighted'
        )

        # Compute weighted AUPRC
        wAUPRC = metrics.average_precision_score(
            y_true,
            y_score,
            average='weighted'
        )

        print(f'{label_name} - wAUROC: {wAUROC:.4f}, wAUPRC: {wAUPRC:.4f}')

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")

# Define files and labels
csv_files_with_labels = [
    ('/content/mimic_age_brax_test_full.csv', 'BRAX'),
    ('/content/mimic_age_chexpert_test.csv', 'CheXpert'),
    ('/content/mimic_age_jsrt_test.csv', 'JSRT'),
    ('/content/mimic_age_mimic_test.csv', 'MIMIC'),
    ('/content/mimic_age_nih_test.csv', 'NIH'),
    ('/content/mimic_age_padchest_test_full.csv', 'PadChest'),
    ('/content/mimic_age_vindr_test.csv', 'VinDR'),
    ('/content/mimic_age_shenzhen_test.csv', 'Shenzhen')
]

# Run evaluations
for file_path, label in csv_files_with_labels:
    evaluate_age_csv(file_path, label)


BRAX - wAUROC: 0.9083, wAUPRC: 0.7280
CheXpert - wAUROC: 0.8359, wAUPRC: 0.6462
JSRT - wAUROC: 0.7905, wAUPRC: 0.6577
MIMIC - wAUROC: 0.8699, wAUPRC: 0.7005
NIH - wAUROC: 0.8356, wAUPRC: 0.6422


  predict_df = pd.read_csv(csv_file_path)


PadChest - wAUROC: 0.5260, wAUPRC: 0.3091
VinDR - wAUROC: 0.8604, wAUPRC: 0.6535
Shenzhen - wAUROC: 0.6833, wAUPRC: 0.6034


In [None]:
# Define files and labels
csv_files_with_labels = [
    ('/content/mimic_age_brax_test_full.csv', 'BRAX'),
    ('/content/mimic_age_chexpert_test.csv', 'CheXpert'),
    ('/content/mimic_age_jsrt_test.csv', 'JSRT'),
    ('/content/mimic_age_mimic_test.csv', 'MIMIC'),
    ('/content/mimic_age_nih_test.csv', 'NIH'),
    ('/content/mimic_age_padchest_test_full.csv', 'PadChest'),
    ('/content/mimic_age_vindr_test.csv', 'VinDR'),
    ('/content/mimic_age_shenzhen_test.csv', 'Shenzhen')
]

# Run evaluations
for file_path, label in csv_files_with_labels:
    bootstrap_wauc_age(file_path, label)

BRAX - AUROC 95% CI: [0.9057, 0.9110], AUPRC 95% CI: [0.7214, 0.7354]
CheXpert - AUROC 95% CI: [0.8323, 0.8391], AUPRC 95% CI: [0.6400, 0.6526]
JSRT - AUROC 95% CI: [0.7503, 0.8322], AUPRC 95% CI: [0.6112, 0.7237]
MIMIC - AUROC 95% CI: [0.8677, 0.8724], AUPRC 95% CI: [0.6959, 0.7059]
NIH - AUROC 95% CI: [0.8317, 0.8397], AUPRC 95% CI: [0.6351, 0.6502]


  df = pd.read_csv(csv_file_path)


PadChest - AUROC 95% CI: [0.5238, 0.5282], AUPRC 95% CI: [0.3068, 0.3115]
VinDR - AUROC 95% CI: [0.8532, 0.8674], AUPRC 95% CI: [0.6384, 0.6693]
Shenzhen - AUROC 95% CI: [0.6575, 0.7119], AUPRC 95% CI: [0.5678, 0.6487]


#NIH

In [None]:
import pandas as pd
import ast
from sklearn import metrics

def evaluate_age_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Age_Probability'] = predict_df['Age_Probability'].apply(lambda x: ast.literal_eval(x))

        # Normalize true labels
        y_true = predict_df['Patient Age Category'].apply(lambda x: int(float(x)))

        # Probabilities for all 5 classes
        y_score = predict_df['Age_Probability'].tolist()

        # Compute weighted AUROC
        wAUROC = metrics.roc_auc_score(
            y_true,
            y_score,
            multi_class='ovr',
            average='weighted'
        )

        # Compute weighted AUPRC
        wAUPRC = metrics.average_precision_score(
            y_true,
            y_score,
            average='weighted'
        )

        print(f'{label_name} - wAUROC: {wAUROC:.4f}, wAUPRC: {wAUPRC:.4f}')

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")

# Define files and labels
csv_files_with_labels = [
    ('/content/nih_age_brax_test_full.csv', 'BRAX'),
    ('/content/nih_age_chexpert_test.csv', 'CheXpert'),
    ('/content/nih_age_jsrt_test.csv', 'JSRT'),
    ('/content/nih_age_mimic_test.csv', 'MIMIC'),
    ('/content/nih_age_nih_test.csv', 'NIH'),
    ('/content/nih_age_padchest_test_full.csv', 'PadChest'),
    ('/content/nih_age_vindr_test.csv', 'VinDR'),
    ('/content/nih_age_shenzhen_test.csv', 'Shenzhen')
]

# Run evaluations
for file_path, label in csv_files_with_labels:
    evaluate_age_csv(file_path, label)


BRAX - wAUROC: 0.9131, wAUPRC: 0.7304
CheXpert - wAUROC: 0.8147, wAUPRC: 0.6031
JSRT - wAUROC: 0.7315, wAUPRC: 0.6542
MIMIC - wAUROC: 0.8130, wAUPRC: 0.6041
NIH - wAUROC: 0.8921, wAUPRC: 0.7769


  predict_df = pd.read_csv(csv_file_path)


PadChest - wAUROC: 0.5306, wAUPRC: 0.3155
VinDR - wAUROC: 0.8794, wAUPRC: 0.7220
Shenzhen - wAUROC: 0.8259, wAUPRC: 0.7279


In [None]:
# Define files and labels
csv_files_with_labels = [
    ('/content/nih_age_brax_test_full.csv', 'BRAX'),
    ('/content/nih_age_chexpert_test.csv', 'CheXpert'),
    ('/content/nih_age_jsrt_test.csv', 'JSRT'),
    ('/content/nih_age_mimic_test.csv', 'MIMIC'),
    ('/content/nih_age_nih_test.csv', 'NIH'),
    ('/content/nih_age_padchest_test_full.csv', 'PadChest'),
    ('/content/nih_age_vindr_test.csv', 'VinDR'),
    ('/content/nih_age_shenzhen_test.csv', 'Shenzhen')
]

# Run evaluations
for file_path, label in csv_files_with_labels:
    bootstrap_wauc_age(file_path, label)

BRAX - AUROC 95% CI: [0.9105, 0.9159], AUPRC 95% CI: [0.7235, 0.7380]
CheXpert - AUROC 95% CI: [0.8111, 0.8182], AUPRC 95% CI: [0.5966, 0.6099]
JSRT - AUROC 95% CI: [0.6882, 0.7749], AUPRC 95% CI: [0.6085, 0.7101]
MIMIC - AUROC 95% CI: [0.8102, 0.8160], AUPRC 95% CI: [0.5994, 0.6098]
NIH - AUROC 95% CI: [0.8887, 0.8955], AUPRC 95% CI: [0.7708, 0.7836]


  df = pd.read_csv(csv_file_path)


PadChest - AUROC 95% CI: [0.5285, 0.5327], AUPRC 95% CI: [0.3133, 0.3177]
VinDR - AUROC 95% CI: [0.8718, 0.8870], AUPRC 95% CI: [0.7076, 0.7372]
Shenzhen - AUROC 95% CI: [0.7978, 0.8497], AUPRC 95% CI: [0.6953, 0.7631]
