# IEEE

In [12]:
import pandas as pd
import pingouin as pg
import os
from pathlib import Path
from tqdm import tqdm

# suppress warnings
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", message=".*column_view.*")

anonymizer = True

root_dir = Path.cwd()


def anondir(path: Path, prefix=root_dir) -> Path:
    """Anonymize a directory path by replacing user-specific parts with <root>."""
    if not anonymizer:
        return path
    path_str = str(path).replace(str(prefix), "<living-park>")
    return Path(path_str)


display(f"Running in root dir: {anondir(root_dir)}")
stats_dir = Path(root_dir) / "stats_QCed" / "sampled"
print(f"Stats directory: {anondir(stats_dir)}")

output_dir = root_dir / "ancova_ieee"
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {anondir(output_dir)}")

'Running in root dir: <living-park>'

Stats directory: <living-park>/stats_QCed/sampled
Output directory: <living-park>/ancova_ieee


In [13]:
def get_cohort_stats():
    filename = root_dir / "cohort" / "longitudinal_cohort_qced.csv"
    df_clinical = pd.read_csv(filename)
    print(f"Load cohort stats: {os.path.abspath(filename)}")
    columns = [
        "PATNO",
        "first_visit",
        "second_visit",
        "dx_group",
        "SEX",
        "AGE_AT_VISIT",
        "UPDRS",
    ]
    df_clinical.rename(columns={"NP3TOT": "UPDRS"}, inplace=True)
    print(
        f"Number of PD-non-MCI subjects: {df_clinical[df_clinical['dx_group']=='PD-non-MCI']['PATNO'].nunique()}"
    )
    print(
        f"Number of HC subjects: {df_clinical[df_clinical['dx_group']=='HC']['PATNO'].nunique()}"
    )
    print(f"Total number of subjects: {df_clinical['PATNO'].nunique()}")
    return df_clinical[columns]


df_clinical = get_cohort_stats()

Load cohort stats: /mnt/lustre/ychatel/living-park/VIP-python-client/example/freesurfer-fuzzy/cohort/longitudinal_cohort_qced.csv
Number of PD-non-MCI subjects: 112
Number of HC subjects: 89
Total number of subjects: 201


# ANCOVA

## Cortical

In [14]:
def read_table(hemi, measure):
    filename = root_dir / "table_ieee" / f"{hemi}.aparc.{measure}.tsv"
    df = pd.read_csv(filename, sep="\t")
    df["hemi"] = hemi
    df.columns = [c.replace(f"{hemi}.", "") for c in df.columns]
    df.columns = [c.replace(f"{hemi}_", "") for c in df.columns]
    df.columns = [c.replace(f"_{measure}", "") for c in df.columns]
    df.rename(columns={f"aparc.{measure}": "first_visit"}, inplace=True)
    return df


def read_measure(measure):
    lh = read_table("lh", measure)
    rh = read_table("rh", measure)
    return pd.concat([lh, rh], axis=0)


def get_baseline_ancova(metric):
    df = read_measure(metric)
    df = df.melt(id_vars=["first_visit", "hemi"], var_name="region", value_name=metric)
    df = pd.merge(df, df_clinical, on="first_visit")
    df = df[["first_visit", "hemi", "region", metric, "dx_group", "AGE_AT_VISIT"]]
    return df

In [15]:
import pingouin as pg


def compute_ancova(measure, clinical_df, force):
    filename = output_dir / f"ancova_baseline_{measure}.csv"
    if not force and os.path.exists(filename):
        return pd.read_csv(filename)

    df = get_baseline_ancova(measure)
    df = pd.merge(
        df,
        clinical_df,
        left_on="first_visit",
        right_on="first_visit",
        suffixes=("", "_clinical"),
    )
    df = df[
        ["first_visit", "region", measure, "hemi", "dx_group", "AGE_AT_VISIT", "SEX"]
    ]

    ancova_df = pd.DataFrame(columns=["hemisphere", "region", "F", "pval"])
    for hemi in df["hemi"].unique():
        for region in df["region"].unique():
            df_region = df[(df["hemi"] == hemi) & (df["region"] == region)]
            ancova = pg.ancova(
                data=df_region,
                dv=measure,
                between="dx_group",
                covar=["AGE_AT_VISIT", "SEX"],
            )
            (F, pval) = ancova["F"].values[0], ancova["p-unc"].values[0]
            ancova_df.loc[len(ancova_df)] = [hemi, region, F, pval]

    filename = output_dir / f"ancova_baseline_{measure}.csv"
    ancova_df.to_csv(filename, index=False)

    return ancova_df

In [16]:
ancova_volume = compute_ancova("volume", df_clinical, force=True)
ancova_thickness = compute_ancova("thickness", df_clinical, force=True)
ancova_area = compute_ancova("area", df_clinical, force=True)

In [17]:
ancova_volume[ancova_volume["pval"] < 0.05].sort_values("F", ascending=False)

Unnamed: 0,hemisphere,region,F,pval
37,rh,caudalanteriorcingulate,20.107816,1.2e-05
61,rh,rostralmiddlefrontal,16.725103,6.3e-05
34,lh,BrainSegVolNotVent,13.2557,0.000347
70,rh,BrainSegVolNotVent,13.2557,0.000347
21,lh,posteriorcingulate,11.923355,0.000678
60,rh,rostralanteriorcingulate,10.790367,0.001208
18,lh,parstriangularis,10.702139,0.001263
57,rh,posteriorcingulate,10.52458,0.001384
53,rh,parsorbitalis,10.293435,0.001558
54,rh,parstriangularis,10.102775,0.001719


In [18]:
ancova_thickness[ancova_thickness["pval"] < 0.05].sort_values("F", ascending=False)

Unnamed: 0,hemisphere,region,F,pval
35,lh,BrainSegVolNotVent,13.2557,0.000347
72,rh,BrainSegVolNotVent,13.2557,0.000347
15,lh,paracentral,9.567284,0.002268
10,lh,lateralorbitofrontal,6.465934,0.011765
4,lh,entorhinal,6.36598,0.012423
41,rh,entorhinal,6.292856,0.012929
36,lh,eTIV,5.766296,0.017266
73,rh,eTIV,5.766296,0.017266
51,rh,parahippocampal,5.163311,0.024149
22,lh,precentral,5.138073,0.024493


In [19]:
ancova_area[ancova_area["pval"] < 0.05].sort_values("F", ascending=False)

Unnamed: 0,hemisphere,region,F,pval
38,rh,caudalanteriorcingulate,19.466272,1.7e-05
62,rh,rostralmiddlefrontal,14.455424,0.000191
35,lh,BrainSegVolNotVent,13.2557,0.000347
72,rh,BrainSegVolNotVent,13.2557,0.000347
55,rh,parstriangularis,9.908215,0.001901
21,lh,posteriorcingulate,9.480505,0.002373
18,lh,parstriangularis,9.108959,0.00288
16,lh,parsopercularis,8.874935,0.003255
58,rh,posteriorcingulate,7.392372,0.007135
61,rh,rostralanteriorcingulate,7.16758,0.008049


## Subcortical Volume

In [20]:
filename = root_dir / "table_ieee" / "aseg.volume.tsv"
df = pd.read_csv(filename, sep="\t")
df.rename(columns={"Measure:volume": "first_visit"}, inplace=True)
df = df.melt(id_vars=["first_visit"], var_name="region", value_name="volume")
df = pd.merge(
    df,
    df_clinical,
    left_on="first_visit",
    right_on="first_visit",
    suffixes=("", "_clinical"),
)
df = df[["first_visit", "region", "volume", "dx_group", "AGE_AT_VISIT", "SEX"]]

ancova_subcortical_volume_df = pd.DataFrame(columns=["region", "F", "pval"])
for region in tqdm(df["region"].unique()):
    df_region = df[df["region"] == region]
    ancova = pg.ancova(
        data=df_region, dv="volume", between="dx_group", covar=["AGE_AT_VISIT", "SEX"]
    )
    (F, pval) = ancova["F"].values[0], ancova["p-unc"].values[0]
    ancova_subcortical_volume_df.loc[len(ancova_subcortical_volume_df)] = [
        region,
        F,
        pval,
    ]

filename = output_dir / "ancova_baseline_subcortical_volume.csv"
ancova_subcortical_volume_df.to_csv(filename, index=False)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 94.61it/s]


In [21]:
ancova_subcortical_volume_df[ancova_subcortical_volume_df["pval"] < 0.05].sort_values(
    "F", ascending=False
)

Unnamed: 0,region,F,pval
25,Right-Pallidum,16.892373,5.8e-05
20,Right-Cerebellum-White-Matter,16.365274,7.5e-05
3,Left-Cerebellum-Cortex,16.328192,7.6e-05
29,Right-VentralDC,14.008334,0.000239
21,Right-Cerebellum-Cortex,13.441176,0.000317
46,BrainSegVolNotVent,13.2557,0.000347
54,TotalGrayVol,12.089286,0.000624
2,Left-Cerebellum-White-Matter,12.024421,0.000645
53,SubCortGrayVol,11.385603,0.000891
15,Left-VentralDC,11.34386,0.00091
