In [1]:
import pandas as pd
from collections import namedtuple

In [2]:
gcols = ["dataset", "model_name", "ssl", "harmonize"]
Group = namedtuple("Group", gcols)


def flatten_biomarkers(biomarkers, labels):
    biomarker_dict = dict()
    for i in range(len(labels)):
        for j in range(i + 1, len(labels)):
            name = "{} - {}".format(labels[j], labels[i])
            biomarker_dict[name] = biomarkers[i][j]
    return biomarker_dict


def run_analysis(df, thres=5.0):
    biomarkers = df.apply(
        lambda x: flatten_biomarkers(x["biomarkers"], x["module_labels"]),
        axis=1,
    )
    biomarkers = pd.DataFrame(biomarkers.tolist(), index=df["labeled_sites"])

    important = biomarkers > thres
    present = important.apply(lambda x: x[x].index.tolist(), axis=0)
    absent = important.apply(lambda x: x[~x].index.tolist(), axis=0)
    site_present_rate = important.sum(axis=0)
    saliency_score = biomarkers.sum(axis=0)

    biomarkers = pd.concat(
        [site_present_rate, saliency_score, present, absent], axis=1
    )
    biomarkers.columns = [
        "site_present_rate",
        "saliency_score",
        "sites_present",
        "sites_absent",
    ]
    biomarkers = biomarkers.sort_values(
        by=["site_present_rate", "saliency_score"], ascending=False
    )
    return biomarkers


def site_based_analysis(df, thres=5.0):
    analysis_dict = dict()
    for group, temp in df.groupby(gcols):
        group = Group(*group)
        biomarkers = run_analysis(temp, thres)
        analysis_dict[group] = biomarkers
    return analysis_dict


def whole_analysis(df):
    analysis_dict = dict()
    for group, temp in df.groupby(gcols):
        assert len(temp) == 1
        group = Group(*group)
        biomarkers = flatten_biomarkers(
            temp["biomarkers"].iloc[0], temp["module_labels"].iloc[0]
        )
        biomarkers = pd.Series(biomarkers).sort_values(ascending=False)
        analysis_dict[group] = biomarkers
    return analysis_dict


# ABIDE

### Whole Analysis

In [3]:
ABIDE_WHOLE_df = pd.read_parquet("../../.archive/ABIDE_WHOLE/biomarkers/biomarkers.parquet")
ABIDE_WHOLE_df

Unnamed: 0,dataset,labeled_sites,unlabeled_sites,model_name,ssl,harmonize,index,biomarkers,module_labels
0,ABIDE,"['NYU', 'CALTECH', 'LEUVEN_1', 'LEUVEN_2', 'MA...",,VAE-FFN,False,False,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[2.0012451249707914, 1.767965968021977, 2.065...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
1,ABIDE,"['NYU', 'CALTECH', 'LEUVEN_1', 'LEUVEN_2', 'MA...",,VAE-FFN,False,True,"[50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 6...","[[2.1304509544432633, 1.848727021172302, 2.195...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
2,ABIDE,"['NYU', 'CALTECH', 'LEUVEN_1', 'LEUVEN_2', 'MA...",,VAECH-I,False,False,"[100, 101, 102, 103, 104, 105, 106, 107, 108, ...","[[1.8290581169377582, 1.7875616762626336, 2.03...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
3,ABIDE,"['NYU', 'CALTECH', 'LEUVEN_1', 'LEUVEN_2', 'MA...",,VAECH-II,False,False,"[150, 151, 152, 153, 154, 155, 156, 157, 158, ...","[[1.984155062745592, 1.718247494814484, 2.0179...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
4,ABIDE,"['NYU', 'CALTECH', 'LEUVEN_1', 'LEUVEN_2', 'MA...",,VAESDR,False,False,"[200, 201, 202, 203, 204, 205, 206, 207, 208, ...","[[2.127852312679372, 1.7399579119675435, 2.094...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."


In [4]:
ABIDE_WHOLE_analysis_results = whole_analysis(ABIDE_WHOLE_df)

In [5]:
saliency = ABIDE_WHOLE_analysis_results[Group(dataset="ABIDE", model_name="VAE-FFN", ssl=False, harmonize=False)]
saliency[saliency > 5.0]

Trans. Temporal G - Paracentral Lobule    11.650227
Subcallosal G - Sub-Gyral                  8.191764
Paracentral Lobule - Postcentral G         7.080360
Subcallosal G - Culmen                     5.663197
Thalamus - Sup. Temporal G                 5.503090
Sup. Occipital G - Sup. Temporal G         5.142085
dtype: float64

In [6]:
saliency = ABIDE_WHOLE_analysis_results[Group(dataset="ABIDE", model_name="VAECH-II", ssl=False, harmonize=False)]
saliency[saliency > 5.0]

Trans. Temporal G - Paracentral Lobule    12.195952
Subcallosal G - Sub-Gyral                  7.692629
Paracentral Lobule - Postcentral G         7.456307
Sup. Occipital G - Inf. Occipital G        6.425942
Sup. Occipital G - Mid Occipital G         5.576159
Sup. Occipital G - Sup. Temporal G         5.163124
dtype: float64

### Site Analysis

In [7]:
ABIDE_INDIVIDUAL_df = pd.read_parquet("../../.archive/ABIDE_INDIVIDUAL/biomarkers/biomarkers.parquet")
ABIDE_INDIVIDUAL_df = ABIDE_INDIVIDUAL_df.drop(columns=["unlabeled_sites"])

In [8]:
ABIDE_INDIVIDUAL_analysis_results = site_based_analysis(ABIDE_INDIVIDUAL_df)

In [9]:
ABIDE_INDIVIDUAL_analysis_results[Group(dataset="ABIDE", model_name="VAE-FFN", ssl=False, harmonize=False)]

Unnamed: 0,site_present_rate,saliency_score,sites_present,sites_absent
Paracentral Lobule - Postcentral G,12,109.730260,"[CALTECH, LEUVEN_1, LEUVEN_2, NYU, OHSU, OLIN,...","[MAX_MUN, STANFORD, UCLA_2, UM_2]"
Trans. Temporal G - Paracentral Lobule,11,115.153479,"[CALTECH, MAX_MUN, NYU, OLIN, PITT, STANFORD, ...","[LEUVEN_1, LEUVEN_2, OHSU, USM, YALE]"
Caudate - Paracentral Lobule,7,84.394919,"[MAX_MUN, NYU, OLIN, PITT, STANFORD, UCLA_1, USM]","[CALTECH, LEUVEN_1, LEUVEN_2, OHSU, TRINITY, U..."
Sup. Occipital G - Mid Occipital G,6,76.382770,"[LEUVEN_1, LEUVEN_2, STANFORD, TRINITY, UCLA_2...","[CALTECH, MAX_MUN, NYU, OHSU, OLIN, PITT, UCLA..."
Subcallosal G - Culmen,6,58.210090,"[NYU, STANFORD, UCLA_2, UM_2, USM, YALE]","[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, OHSU, O..."
...,...,...,...,...
Trans. Temporal G - Inf. Temporal G,0,8.309094,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."
Sup. Occipital G - Paracentral Lobule,0,8.305750,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."
Subcallosal G - Inf. Occipital G,0,8.119433,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."
Sup. Occipital G - Caudate,0,6.011878,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."


In [10]:
ABIDE_INDIVIDUAL_analysis_results[Group(dataset="ABIDE", model_name="VAECH-II", ssl=True, harmonize=False)]

Unnamed: 0,site_present_rate,saliency_score,sites_present,sites_absent
Paracentral Lobule - Postcentral G,15,110.806131,"[CALTECH, LEUVEN_1, LEUVEN_2, NYU, OHSU, OLIN,...",[MAX_MUN]
Trans. Temporal G - Paracentral Lobule,14,121.011266,"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OL...","[OHSU, USM]"
Caudate - Paracentral Lobule,8,82.005140,"[MAX_MUN, NYU, OLIN, PITT, STANFORD, UCLA_1, U...","[CALTECH, LEUVEN_1, LEUVEN_2, OHSU, TRINITY, U..."
Sup. Occipital G - Mid Occipital G,6,82.102394,"[LEUVEN_1, LEUVEN_2, STANFORD, UCLA_1, UCLA_2,...","[CALTECH, MAX_MUN, NYU, OHSU, OLIN, PITT, TRIN..."
Trans. Temporal G - Caudate,6,74.830989,"[LEUVEN_1, OHSU, UM_1, UM_2, USM, YALE]","[CALTECH, LEUVEN_2, MAX_MUN, NYU, OLIN, PITT, ..."
...,...,...,...,...
Sup. Occipital G - Thalamus,0,10.754016,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."
Sup. Occipital G - Caudate,0,10.080713,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."
Trans. Temporal G - Inf. Temporal G,0,10.076910,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."
Subcallosal G - Inf. Occipital G,0,9.434391,[],"[CALTECH, LEUVEN_1, LEUVEN_2, MAX_MUN, NYU, OH..."


# ADHD

### Whole Analysis

In [11]:
ADHD_WHOLE_df = pd.read_parquet("../../.archive/ADHD_WHOLE/biomarkers/biomarkers.parquet")
ADHD_WHOLE_df

Unnamed: 0,dataset,labeled_sites,unlabeled_sites,model_name,ssl,harmonize,index,biomarkers,module_labels
0,ADHD,"['NYU', 'PKU', 'NI', 'OHSU']",,VAE-FFN,False,False,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[2.2791221315896273, 2.222043339287873, 2.165...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
1,ADHD,"['NYU', 'PKU', 'NI', 'OHSU']",,VAE-FFN,False,True,"[50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 6...","[[2.2677217094634687, 2.522793711511227, 2.226...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
2,ADHD,"['NYU', 'PKU', 'NI', 'OHSU']",,VAECH-I,False,False,"[100, 101, 102, 103, 104, 105, 106, 107, 108, ...","[[2.6688978077286793, 2.4778605232214392, 2.42...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
3,ADHD,"['NYU', 'PKU', 'NI', 'OHSU']",,VAECH-II,False,False,"[150, 151, 152, 153, 154, 155, 156, 157, 158, ...","[[2.5880877089279553, 2.524906417078577, 2.390...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."
4,ADHD,"['NYU', 'PKU', 'NI', 'OHSU']",,VAESDR,False,False,"[200, 201, 202, 203, 204, 205, 206, 207, 208, ...","[[2.5072522507126225, 2.2253698802003252, 2.29...","[Mid Frontal G, Mid Temporal G, Cingulate G, M..."


In [12]:
ADHD_WHOLE_analysis_results = whole_analysis(ADHD_WHOLE_df)

In [13]:
saliency = ADHD_WHOLE_analysis_results[Group(dataset="ADHD", model_name="VAE-FFN", ssl=False, harmonize=False)]
saliency[saliency > 5.0]

Posterior Cingulate - Thalamus            7.835226
Uncus - Inf. Frontal G                    7.523813
Uncus - Lentiform Nucleus                 7.434816
Posterior Cingulate - Mid Occipital G     6.561900
Sub-Gyral - Anterior Cingulate            6.364107
Caudate - Inf. Temporal G                 5.973106
Anterior Cingulate - Mid Frontal G        5.729676
Trans. Temporal G - Paracentral Lobule    5.441744
Supramarginal G - Inf. Frontal G          5.280399
Inf. Temporal G - Posterior Cingulate     5.192220
dtype: float64

In [14]:
saliency = ADHD_WHOLE_analysis_results[Group(dataset="ADHD", model_name="VAECH-II", ssl=False, harmonize=False)]
saliency[saliency > 5.0]

Sub-Gyral - Anterior Cingulate            8.176217
Uncus - Lentiform Nucleus                 7.446866
Trans. Temporal G - Extra-Nuclear         6.488384
Inf. Temporal G - Posterior Cingulate     6.452964
Posterior Cingulate - Thalamus            6.429725
Posterior Cingulate - Mid Occipital G     6.254722
Anterior Cingulate - Mid Frontal G        5.926423
Supramarginal G - Inf. Frontal G          5.404541
Trans. Temporal G - Paracentral Lobule    5.170383
dtype: float64

### Site Analysis

In [15]:
ADHD_INDIVIDUAL_df = pd.read_parquet("../../.archive/ADHD_INDIVIDUAL/biomarkers/biomarkers.parquet")
ADHD_INDIVIDUAL_df = ADHD_INDIVIDUAL_df.drop(columns=["unlabeled_sites"])

In [16]:
ADHD_INDIVIDUAL_analysis_results = site_based_analysis(ADHD_INDIVIDUAL_df)

In [17]:
ADHD_INDIVIDUAL_analysis_results[Group(dataset="ADHD", model_name="VAE-FFN", ssl=False, harmonize=False)]

Unnamed: 0,site_present_rate,saliency_score,sites_present,sites_absent
Sup. Occipital G - Inf. Occipital G,3,25.717534,"[NYU, OHSU, PKU]",[NI]
Uncus - Lentiform Nucleus,3,23.455905,"[NI, OHSU, PKU]",[NYU]
Anterior Cingulate - Mid Frontal G,3,21.129717,"[NYU, OHSU, PKU]",[NI]
Posterior Cingulate - Thalamus,3,19.815669,"[NI, NYU, PKU]",[OHSU]
Trans. Temporal G - Thalamus,2,25.970531,"[NYU, OHSU]","[NI, PKU]"
...,...,...,...,...
Uncus - Sup. Parietal Lobule,0,2.155725,[],"[NI, NYU, OHSU, PKU]"
Sup. Occipital G - Uncus,0,2.080899,[],"[NI, NYU, OHSU, PKU]"
Trans. Temporal G - Supramarginal G,0,1.470267,[],"[NI, NYU, OHSU, PKU]"
Uncus - Inf. Occipital G,0,1.452822,[],"[NI, NYU, OHSU, PKU]"


In [18]:
ADHD_INDIVIDUAL_analysis_results[Group(dataset="ADHD", model_name="VAECH-II", ssl=True, harmonize=False)]

Unnamed: 0,site_present_rate,saliency_score,sites_present,sites_absent
Trans. Temporal G - Thalamus,3,29.329817,"[NI, NYU, OHSU]",[PKU]
Uncus - Lentiform Nucleus,3,23.753205,"[NI, OHSU, PKU]",[NYU]
Sup. Occipital G - Inf. Occipital G,3,21.430314,"[NYU, OHSU, PKU]",[NI]
Sub-Gyral - Anterior Cingulate,3,21.170005,"[NI, OHSU, PKU]",[NYU]
Uncus - Inf. Frontal G,2,20.763588,"[NI, PKU]","[NYU, OHSU]"
...,...,...,...,...
Uncus - Sup. Parietal Lobule,0,2.646379,[],"[NI, NYU, OHSU, PKU]"
Uncus - Sub-Gyral,0,2.566098,[],"[NI, NYU, OHSU, PKU]"
Sup. Occipital G - Caudate,0,2.206993,[],"[NI, NYU, OHSU, PKU]"
Trans. Temporal G - Uncus,0,1.586603,[],"[NI, NYU, OHSU, PKU]"
