In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import ast
from glob import glob
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

The performance of 3 models are analyzed under their respective header in this notebook (i.e., CheXpert-trained, MIMIC-trained, NIH-trained models)

# MIMIC

In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auc_for_csv(csv_file_path, label_name, n_bootstrap=1000, seed=2025):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))

        # Handle Age Category
        if label_name == 'VinDr-PCXR':
            predict_df['Patient Age Category'] = 0
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
        else:
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df['Patient Age Category'] = predict_df[col].apply(categorize_age)
                    break

        # Filter pediatric cases
        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # Ground Truth Gender
        for col in ['Gender', 'sex', 'Sex', 'PatientSex', 'gender']:
            if col in predict_df.columns:
                y_true = predict_df[col]
                break
        else:
            raise KeyError("No recognized gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})  # Female = 1, Male = 0
        mask = y_true.notna()

        y_true = y_true[mask].astype(int).values
        y_score = predict_df.loc[mask, 'Gender_Probability'].apply(lambda x: x[0]).values

        # Bootstrap AUROC
        rng = np.random.RandomState(seed)
        bootstrapped_scores = []

        for i in range(n_bootstrap):
            indices = rng.randint(0, len(y_true), len(y_true))
            if len(np.unique(y_true[indices])) < 2:
                continue  # skip if not both classes present
            score = metrics.roc_auc_score(y_true[indices], y_score[indices])
            bootstrapped_scores.append(score)

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower = np.percentile(bootstrapped_scores, 2.5)
        upper = np.percentile(bootstrapped_scores, 97.5)

        print(f"{label_name} - AUROC: {auroc:.4f} [{lower:.4f}, {upper:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")
csv_files_with_labels = [
    ('/content/mimic_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/mimic_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/mimic_gender_nih_test.csv', 'NIH Peds'),
    ('/content/mimic_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/mimic_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/mimic_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/mimic_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/mimic_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]

for file_path, label in csv_files_with_labels:
    bootstrap_auc_for_csv(file_path, label)


CheXpert Peds - AUROC: 0.9685 [0.9559, 0.9788]
MIMIC Peds - AUROC: 0.9987 [0.9962, 1.0000]
NIH Peds - AUROC: 0.8601 [0.8422, 0.8779]
BRAX Peds - AUROC: 0.8291 [0.8112, 0.8469]
Skipping JSRT Peds: only 2 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds - AUROC: 0.5158 [0.5011, 0.5310]
Skipping Shenzhen Peds: only 50 pediatric cases.
VinDr-PCXR - AUROC: 0.5675 [0.5549, 0.5789]


In [None]:
import pandas as pd
import ast
import numpy as np
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auroc(y_true, y_score, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue  # skip if only one class present in the sample
        score = metrics.roc_auc_score(y_true[indices], y_score[indices])
        bootstrapped_scores.append(score)

    sorted_scores = np.sort(bootstrapped_scores)
    lower = np.percentile(sorted_scores, 2.5)
    upper = np.percentile(sorted_scores, 97.5)
    return lower, upper

def evaluate_peds_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))

        if label_name == 'VinDr-PCXR':
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    def valid_age(age):
                        if isinstance(age, str) and age.endswith('Y'):
                            try:
                                age_value = int(age[:-1])
                                return age_value < 12
                            except ValueError:
                                return False
                        return False
                    predict_df = predict_df[predict_df[col].apply(valid_age)]
                    predict_df['Patient Age Category'] = 0
                    break
        else:
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df = predict_df[predict_df[col] < 12]
                    predict_df['Patient Age Category'] = 0
                    break

        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # --- Gender label ---
        if 'Gender' in predict_df.columns:
            y_true = predict_df['Gender']
        elif 'sex' in predict_df.columns:
            y_true = predict_df['sex']
        elif 'Sex' in predict_df.columns:
            y_true = predict_df['Sex']
        elif 'PatientSex' in predict_df.columns:
            y_true = predict_df['PatientSex']
        elif 'gender' in predict_df.columns:
            y_true = predict_df['gender']
        else:
            raise KeyError("No gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})
        y_score = [prob[0] for prob in predict_df['Gender_Probability']]

        mask = y_true.notna()
        y_true = y_true[mask]
        y_score = np.array(y_score)[mask]

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower_ci, upper_ci = bootstrap_auroc(y_true, y_score)
        print(f"{label_name} (n={len(y_true)}): AUROC: {auroc:.4f} [{lower_ci:.4f}, {upper_ci:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")



# Define files and display names in the desired order
csv_files_with_labels = [
    ('/content/mimic_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/mimic_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/mimic_gender_nih_test.csv', 'NIH Peds'),
    ('/content/mimic_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/mimic_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/mimic_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/mimic_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/mimic_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]
for file_path, label in csv_files_with_labels:
    evaluate_peds_csv(file_path, label)

Skipping CheXpert Peds: only 0 pediatric cases.
Skipping MIMIC Peds: only 0 pediatric cases.
NIH Peds (n=394): AUROC: 0.5491 [0.4929, 0.6060]
BRAX Peds (n=2491): AUROC: 0.5411 [0.5175, 0.5631]
Skipping JSRT Peds: only 0 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds (n=4288): AUROC: 0.4930 [0.4756, 0.5105]
Skipping Shenzhen Peds: only 21 pediatric cases.
VinDr-PCXR (n=5295): AUROC: 0.5946 [0.5802, 0.6101]


In [None]:
import pandas as pd
import ast
import numpy as np
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auroc(y_true, y_score, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue  # skip if only one class present in the sample
        score = metrics.roc_auc_score(y_true[indices], y_score[indices])
        bootstrapped_scores.append(score)

    sorted_scores = np.sort(bootstrapped_scores)
    lower = np.percentile(sorted_scores, 2.5)
    upper = np.percentile(sorted_scores, 97.5)
    return lower, upper

def evaluate_peds_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))
        # --- Handle Age Column ---
        if label_name == 'VinDr-PCXR':
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    def valid_age(age):
                        # Ensure that age is a string
                        if isinstance(age, str) and age.endswith('Y'):
                            try:
                                # Remove the 'Y' and convert the remaining part to an integer
                                age_value = int(age[:-1])
                                return age_value >= 12
                            except ValueError:
                                return False
                        return False

                    # Apply the condition to filter rows
                    predict_df = predict_df[predict_df[col].apply(valid_age)]

                    # Create the 'Patient Age Category' column, which is set to 0 for these rows
                    predict_df['Patient Age Category'] = 0
                    break  # We only need to process one column, so break after handling the first one

        else:
            # Try to create 'Patient Age Category' from existing age-related columns
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df = predict_df[predict_df[col] >= 12]
                    predict_df['Patient Age Category'] = predict_df[col].apply(categorize_age)
                    break

        # Filter to only pediatric category (0)
        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # --- Gender label ---
        if 'Gender' in predict_df.columns:
            y_true = predict_df['Gender']
        elif 'sex' in predict_df.columns:
            y_true = predict_df['sex']
        elif 'Sex' in predict_df.columns:
            y_true = predict_df['Sex']
        elif 'PatientSex' in predict_df.columns:
            y_true = predict_df['PatientSex']
        elif 'gender' in predict_df.columns:
            y_true = predict_df['gender']
        else:
            raise KeyError("No gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})
        y_score = [prob[0] for prob in predict_df['Gender_Probability']]

        mask = y_true.notna()
        y_true = y_true[mask]
        y_score = np.array(y_score)[mask]

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower_ci, upper_ci = bootstrap_auroc(y_true, y_score)
        print(f"{label_name} (n={len(y_true)}): AUROC: {auroc:.4f} [{lower_ci:.4f}, {upper_ci:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")



# Define files and display names in the desired order
csv_files_with_labels = [
    ('/content/mimic_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/mimic_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/mimic_gender_nih_test.csv', 'NIH Peds'),
    ('/content/mimic_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/mimic_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/mimic_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/mimic_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/mimic_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]
for file_path, label in csv_files_with_labels:
    evaluate_peds_csv(file_path, label)

CheXpert Peds (n=541): AUROC: 0.9685 [0.9566, 0.9796]
MIMIC Peds (n=215): AUROC: 0.9987 [0.9964, 1.0000]
NIH Peds (n=1135): AUROC: 0.9286 [0.9123, 0.9434]
BRAX Peds (n=1065): AUROC: 0.9770 [0.9688, 0.9840]
Skipping JSRT Peds: only 2 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds (n=2190): AUROC: 0.5518 [0.5278, 0.5759]
Skipping Shenzhen Peds: only 29 pediatric cases.
VinDr-PCXR (n=63): AUROC: 0.9323 [0.8636, 0.9808]


#NIH

In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auc_for_csv(csv_file_path, label_name, n_bootstrap=1000, seed=2025):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))

        # Handle Age Category
        if label_name == 'VinDr-PCXR':
            predict_df['Patient Age Category'] = 0
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
        else:
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df['Patient Age Category'] = predict_df[col].apply(categorize_age)
                    break

        # Filter pediatric cases
        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # Ground Truth Gender
        for col in ['Gender', 'sex', 'Sex', 'PatientSex', 'gender']:
            if col in predict_df.columns:
                y_true = predict_df[col]
                break
        else:
            raise KeyError("No recognized gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})  # Female = 1, Male = 0
        mask = y_true.notna()

        y_true = y_true[mask].astype(int).values
        y_score = predict_df.loc[mask, 'Gender_Probability'].apply(lambda x: x[0]).values

        # Bootstrap AUROC
        rng = np.random.RandomState(seed)
        bootstrapped_scores = []

        for i in range(n_bootstrap):
            indices = rng.randint(0, len(y_true), len(y_true))
            if len(np.unique(y_true[indices])) < 2:
                continue  # skip if not both classes present
            score = metrics.roc_auc_score(y_true[indices], y_score[indices])
            bootstrapped_scores.append(score)

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower = np.percentile(bootstrapped_scores, 2.5)
        upper = np.percentile(bootstrapped_scores, 97.5)

        print(f"{label_name} - AUROC: {auroc:.4f} [{lower:.4f}, {upper:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")
csv_files_with_labels = [
    ('/content/nih_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/nih_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/nih_gender_nih_test.csv', 'NIH Peds'),
    ('/content/nih_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/nih_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/nih_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/nih_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/nih_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]

for file_path, label in csv_files_with_labels:
    bootstrap_auc_for_csv(file_path, label)


CheXpert Peds - AUROC: 0.9573 [0.9384, 0.9722]
MIMIC Peds - AUROC: 0.9894 [0.9800, 0.9972]
NIH Peds - AUROC: 0.9072 [0.8932, 0.9220]
BRAX Peds - AUROC: 0.8823 [0.8669, 0.8957]
Skipping JSRT Peds: only 2 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds - AUROC: 0.4962 [0.4809, 0.5115]
Skipping Shenzhen Peds: only 50 pediatric cases.
VinDr-PCXR - AUROC: 0.6049 [0.5933, 0.6158]


In [None]:
import pandas as pd
import ast
import numpy as np
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auroc(y_true, y_score, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue  # skip if only one class present in the sample
        score = metrics.roc_auc_score(y_true[indices], y_score[indices])
        bootstrapped_scores.append(score)

    sorted_scores = np.sort(bootstrapped_scores)
    lower = np.percentile(sorted_scores, 2.5)
    upper = np.percentile(sorted_scores, 97.5)
    return lower, upper

def evaluate_peds_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))

        if label_name == 'VinDr-PCXR':
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    def valid_age(age):
                        if isinstance(age, str) and age.endswith('Y'):
                            try:
                                age_value = int(age[:-1])
                                return age_value < 12
                            except ValueError:
                                return False
                        return False
                    predict_df = predict_df[predict_df[col].apply(valid_age)]
                    predict_df['Patient Age Category'] = 0
                    break
        else:
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df = predict_df[predict_df[col] < 12]
                    predict_df['Patient Age Category'] = 0
                    break

        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # --- Gender label ---
        if 'Gender' in predict_df.columns:
            y_true = predict_df['Gender']
        elif 'sex' in predict_df.columns:
            y_true = predict_df['sex']
        elif 'Sex' in predict_df.columns:
            y_true = predict_df['Sex']
        elif 'PatientSex' in predict_df.columns:
            y_true = predict_df['PatientSex']
        elif 'gender' in predict_df.columns:
            y_true = predict_df['gender']
        else:
            raise KeyError("No gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})
        y_score = [prob[0] for prob in predict_df['Gender_Probability']]

        mask = y_true.notna()
        y_true = y_true[mask]
        y_score = np.array(y_score)[mask]

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower_ci, upper_ci = bootstrap_auroc(y_true, y_score)
        print(f"{label_name} (n={len(y_true)}): AUROC: {auroc:.4f} [{lower_ci:.4f}, {upper_ci:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")



# Define files and display names in the desired order
csv_files_with_labels = [
    ('/content/nih_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/nih_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/nih_gender_nih_test.csv', 'NIH Peds'),
    ('/content/nih_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/nih_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/nih_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/nih_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/nih_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]
for file_path, label in csv_files_with_labels:
    evaluate_peds_csv(file_path, label)

Skipping CheXpert Peds: only 0 pediatric cases.
Skipping MIMIC Peds: only 0 pediatric cases.
NIH Peds (n=394): AUROC: 0.5420 [0.4792, 0.5997]
BRAX Peds (n=2491): AUROC: 0.5966 [0.5738, 0.6188]
Skipping JSRT Peds: only 0 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds (n=4288): AUROC: 0.4997 [0.4821, 0.5173]
Skipping Shenzhen Peds: only 21 pediatric cases.
VinDr-PCXR (n=5295): AUROC: 0.6264 [0.6118, 0.6413]


In [None]:
import pandas as pd
import ast
import numpy as np
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auroc(y_true, y_score, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue  # skip if only one class present in the sample
        score = metrics.roc_auc_score(y_true[indices], y_score[indices])
        bootstrapped_scores.append(score)

    sorted_scores = np.sort(bootstrapped_scores)
    lower = np.percentile(sorted_scores, 2.5)
    upper = np.percentile(sorted_scores, 97.5)
    return lower, upper

def evaluate_peds_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))
        # --- Handle Age Column ---
        if label_name == 'VinDr-PCXR':
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    def valid_age(age):
                        # Ensure that age is a string
                        if isinstance(age, str) and age.endswith('Y'):
                            try:
                                # Remove the 'Y' and convert the remaining part to an integer
                                age_value = int(age[:-1])
                                return age_value >= 12
                            except ValueError:
                                return False
                        return False

                    # Apply the condition to filter rows
                    predict_df = predict_df[predict_df[col].apply(valid_age)]

                    # Create the 'Patient Age Category' column, which is set to 0 for these rows
                    predict_df['Patient Age Category'] = 0
                    break  # We only need to process one column, so break after handling the first one

        else:
            # Try to create 'Patient Age Category' from existing age-related columns
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df = predict_df[predict_df[col] >= 12]
                    predict_df['Patient Age Category'] = predict_df[col].apply(categorize_age)
                    break

        # Filter to only pediatric category (0)
        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # --- Gender label ---
        if 'Gender' in predict_df.columns:
            y_true = predict_df['Gender']
        elif 'sex' in predict_df.columns:
            y_true = predict_df['sex']
        elif 'Sex' in predict_df.columns:
            y_true = predict_df['Sex']
        elif 'PatientSex' in predict_df.columns:
            y_true = predict_df['PatientSex']
        elif 'gender' in predict_df.columns:
            y_true = predict_df['gender']
        else:
            raise KeyError("No gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})
        y_score = [prob[0] for prob in predict_df['Gender_Probability']]

        mask = y_true.notna()
        y_true = y_true[mask]
        y_score = np.array(y_score)[mask]

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower_ci, upper_ci = bootstrap_auroc(y_true, y_score)
        print(f"{label_name} (n={len(y_true)}): AUROC: {auroc:.4f} [{lower_ci:.4f}, {upper_ci:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")



# Define files and display names in the desired order
csv_files_with_labels = [
    ('/content/nih_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/nih_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/nih_gender_nih_test.csv', 'NIH Peds'),
    ('/content/nih_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/nih_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/nih_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/nih_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/nih_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]
for file_path, label in csv_files_with_labels:
    evaluate_peds_csv(file_path, label)

CheXpert Peds (n=541): AUROC: 0.9573 [0.9398, 0.9723]
MIMIC Peds (n=215): AUROC: 0.9894 [0.9801, 0.9973]
NIH Peds (n=1135): AUROC: 0.9788 [0.9714, 0.9856]
BRAX Peds (n=1065): AUROC: 0.9881 [0.9825, 0.9928]
Skipping JSRT Peds: only 2 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds (n=2190): AUROC: 0.4987 [0.4745, 0.5240]
Skipping Shenzhen Peds: only 29 pediatric cases.
VinDr-PCXR (n=63): AUROC: 0.9404 [0.8750, 0.9857]


# CheXpert

In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auc_for_csv(csv_file_path, label_name, n_bootstrap=1000, seed=2025):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))

        # Handle Age Category
        if label_name == 'VinDr-PCXR':
            predict_df['Patient Age Category'] = 0
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
        else:
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df['Patient Age Category'] = predict_df[col].apply(categorize_age)
                    break

        # Filter pediatric cases
        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # Ground Truth Gender
        for col in ['Gender', 'sex', 'Sex', 'PatientSex', 'gender']:
            if col in predict_df.columns:
                y_true = predict_df[col]
                break
        else:
            raise KeyError("No recognized gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})  # Female = 1, Male = 0
        mask = y_true.notna()

        y_true = y_true[mask].astype(int).values
        y_score = predict_df.loc[mask, 'Gender_Probability'].apply(lambda x: x[0]).values

        # Bootstrap AUROC
        rng = np.random.RandomState(seed)
        bootstrapped_scores = []

        for i in range(n_bootstrap):
            indices = rng.randint(0, len(y_true), len(y_true))
            if len(np.unique(y_true[indices])) < 2:
                continue  # skip if not both classes present
            score = metrics.roc_auc_score(y_true[indices], y_score[indices])
            bootstrapped_scores.append(score)

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower = np.percentile(bootstrapped_scores, 2.5)
        upper = np.percentile(bootstrapped_scores, 97.5)

        print(f"{label_name} - AUROC: {auroc:.4f} [{lower:.4f}, {upper:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")
csv_files_with_labels = [
    ('/content/chexpert_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/chexpert_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/chexpert_gender_nih_test.csv', 'NIH Peds'),
    ('/content/chexpert_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/chexpert_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/chexpert_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/chexpert_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/chexpert_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]

for file_path, label in csv_files_with_labels:
    bootstrap_auc_for_csv(file_path, label)


CheXpert Peds - AUROC: 0.9887 [0.9821, 0.9936]
MIMIC Peds - AUROC: 0.9950 [0.9882, 0.9994]
NIH Peds - AUROC: 0.8598 [0.8418, 0.8774]
BRAX Peds - AUROC: 0.8646 [0.8485, 0.8803]
Skipping JSRT Peds: only 2 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds - AUROC: 0.4972 [0.4818, 0.5122]
Skipping Shenzhen Peds: only 50 pediatric cases.
VinDr-PCXR - AUROC: 0.5892 [0.5767, 0.6006]


In [None]:
import pandas as pd
import ast
import numpy as np
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auroc(y_true, y_score, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue  # skip if only one class present in the sample
        score = metrics.roc_auc_score(y_true[indices], y_score[indices])
        bootstrapped_scores.append(score)

    sorted_scores = np.sort(bootstrapped_scores)
    lower = np.percentile(sorted_scores, 2.5)
    upper = np.percentile(sorted_scores, 97.5)
    return lower, upper

def evaluate_peds_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))

        if label_name == 'VinDr-PCXR':
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    def valid_age(age):
                        if isinstance(age, str) and age.endswith('Y'):
                            try:
                                age_value = int(age[:-1])
                                return age_value < 12
                            except ValueError:
                                return False
                        return False
                    predict_df = predict_df[predict_df[col].apply(valid_age)]
                    predict_df['Patient Age Category'] = 0
                    break
        else:
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df = predict_df[predict_df[col] < 12]
                    predict_df['Patient Age Category'] = 0
                    break

        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # --- Gender label ---
        if 'Gender' in predict_df.columns:
            y_true = predict_df['Gender']
        elif 'sex' in predict_df.columns:
            y_true = predict_df['sex']
        elif 'Sex' in predict_df.columns:
            y_true = predict_df['Sex']
        elif 'PatientSex' in predict_df.columns:
            y_true = predict_df['PatientSex']
        elif 'gender' in predict_df.columns:
            y_true = predict_df['gender']
        else:
            raise KeyError("No gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})
        y_score = [prob[0] for prob in predict_df['Gender_Probability']]

        mask = y_true.notna()
        y_true = y_true[mask]
        y_score = np.array(y_score)[mask]

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower_ci, upper_ci = bootstrap_auroc(y_true, y_score)
        print(f"{label_name} (n={len(y_true)}): AUROC: {auroc:.4f} [{lower_ci:.4f}, {upper_ci:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")



# Define files and display names in the desired order
csv_files_with_labels = [
    ('/content/chexpert_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/chexpert_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/chexpert_gender_nih_test.csv', 'NIH Peds'),
    ('/content/chexpert_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/chexpert_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/chexpert_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/chexpert_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/chexpert_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]
for file_path, label in csv_files_with_labels:
    evaluate_peds_csv(file_path, label)

Skipping CheXpert Peds: only 0 pediatric cases.
Skipping MIMIC Peds: only 0 pediatric cases.
NIH Peds (n=394): AUROC: 0.5081 [0.4507, 0.5661]
BRAX Peds (n=2491): AUROC: 0.6003 [0.5785, 0.6213]
Skipping JSRT Peds: only 0 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds (n=4288): AUROC: 0.4850 [0.4671, 0.5039]
Skipping Shenzhen Peds: only 21 pediatric cases.
VinDr-PCXR (n=5295): AUROC: 0.6478 [0.6326, 0.6624]


In [None]:
import pandas as pd
import ast
import numpy as np
from sklearn import metrics

def categorize_age(age):
    try:
        age = float(age)
        if 0 < age <= 20:
            return 0
        elif 21 <= age <= 40:
            return 1
        elif 41 <= age <= 60:
            return 2
        elif 61 <= age <= 80:
            return 3
        elif age >= 81:
            return 4
    except:
        return None

def bootstrap_auroc(y_true, y_score, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue  # skip if only one class present in the sample
        score = metrics.roc_auc_score(y_true[indices], y_score[indices])
        bootstrapped_scores.append(score)

    sorted_scores = np.sort(bootstrapped_scores)
    lower = np.percentile(sorted_scores, 2.5)
    upper = np.percentile(sorted_scores, 97.5)
    return lower, upper

def evaluate_peds_csv(csv_file_path, label_name):
    try:
        predict_df = pd.read_csv(csv_file_path)
        predict_df['Gender_Probability'] = predict_df['Gender_Probability'].apply(lambda x: ast.literal_eval(x))
        # --- Handle Age Column ---
        if label_name == 'VinDr-PCXR':
            predict_df = predict_df.drop_duplicates(subset=['Path'], keep='first')
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    def valid_age(age):
                        # Ensure that age is a string
                        if isinstance(age, str) and age.endswith('Y'):
                            try:
                                # Remove the 'Y' and convert the remaining part to an integer
                                age_value = int(age[:-1])
                                return age_value >= 12
                            except ValueError:
                                return False
                        return False

                    # Apply the condition to filter rows
                    predict_df = predict_df[predict_df[col].apply(valid_age)]
                    # Create the 'Patient Age Category' column, which is set to 0 for these rows
                    predict_df['Patient Age Category'] = 0
                    print('hi')
                    break  # We only need to process one column, so break after handling the first one

        else:
            # Try to create 'Patient Age Category' from existing age-related columns
            for col in ['Age', 'PatientAge', 'age', 'patient_age', 'Patient Age']:
                if col in predict_df.columns:
                    predict_df[col] = pd.to_numeric(predict_df[col], errors='coerce')
                    predict_df = predict_df[predict_df[col] >= 12]
                    predict_df['Patient Age Category'] = predict_df[col].apply(categorize_age)
                    break

        # Filter to only pediatric category (0)
        predict_df = predict_df[predict_df['Patient Age Category'] == 0]
        if len(predict_df) <= 50:
            print(f"Skipping {label_name}: only {len(predict_df)} pediatric cases.")
            return

        # --- Gender label ---
        if 'Gender' in predict_df.columns:
            y_true = predict_df['Gender']
        elif 'sex' in predict_df.columns:
            y_true = predict_df['sex']
        elif 'Sex' in predict_df.columns:
            y_true = predict_df['Sex']
        elif 'PatientSex' in predict_df.columns:
            y_true = predict_df['PatientSex']
        elif 'gender' in predict_df.columns:
            y_true = predict_df['gender']
        else:
            raise KeyError("No gender column found.")

        y_true = y_true.replace({'Male': 'M', 'Female': 'F'})
        y_true = y_true.map({'F': 1, 'M': 0})
        y_score = [prob[0] for prob in predict_df['Gender_Probability']]

        mask = y_true.notna()
        y_true = y_true[mask]
        y_score = np.array(y_score)[mask]

        auroc = metrics.roc_auc_score(y_true, y_score)
        lower_ci, upper_ci = bootstrap_auroc(y_true, y_score)
        print(f"{label_name} (n={len(y_true)}): AUROC: {auroc:.4f} [{lower_ci:.4f}, {upper_ci:.4f}]")

    except (FileNotFoundError, KeyError, ValueError) as e:
        print(f"Error processing {csv_file_path}: {e}")



# Define files and display names in the desired order
csv_files_with_labels = [
    ('/content/chexpert_gender_chexpert_test.csv', 'CheXpert Peds'),
    ('/content/chexpert_gender_mimic_test.csv', 'MIMIC Peds'),
    ('/content/chexpert_gender_nih_test.csv', 'NIH Peds'),
    ('/content/chexpert_gender_brax_test_full.csv', 'BRAX Peds'),
    ('/content/chexpert_gender_jsrt_test.csv', 'JSRT Peds'),
    ('/content/chexpert_gender_padchest_test_full.csv', 'PadChest Peds'),
    ('/content/chexpert_gender_shenzhen_test.csv', 'Shenzhen Peds'),
    ('/content/chexpert_gender_vindr_peds_test.csv', 'VinDr-PCXR')
]
for file_path, label in csv_files_with_labels:
    evaluate_peds_csv(file_path, label)

CheXpert Peds (n=541): AUROC: 0.9887 [0.9825, 0.9940]
MIMIC Peds (n=215): AUROC: 0.9950 [0.9883, 0.9995]
NIH Peds (n=1135): AUROC: 0.9393 [0.9252, 0.9523]
BRAX Peds (n=1065): AUROC: 0.9860 [0.9797, 0.9912]
Skipping JSRT Peds: only 2 pediatric cases.


  predict_df = pd.read_csv(csv_file_path)


PadChest Peds (n=2190): AUROC: 0.5182 [0.4929, 0.5425]
Skipping Shenzhen Peds: only 29 pediatric cases.
hi
VinDr-PCXR (n=63): AUROC: 0.9556 [0.9033, 0.9907]
