# IEEE

In [11]:
import pandas as pd
import pingouin as pg
import os
from pathlib import Path
from tqdm import tqdm

# suppress warnings
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", message=".*column_view.*")

anonymizer = True

root_dir = Path.cwd()


def anondir(path: Path, prefix=root_dir) -> Path:
    """Anonymize a directory path by replacing user-specific parts with <root>."""
    if not anonymizer:
        return path
    path_str = str(path).replace(str(prefix), "<living-park>")
    return Path(path_str)


display(f"Running in root dir: {anondir(root_dir)}")
stats_dir = Path(root_dir) / "stats_QCed" / "sampled"
print(f"Stats directory: {anondir(stats_dir)}")

output_dir = root_dir / "ancova_ieee"
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {anondir(output_dir)}")

'Running in root dir: <living-park>'

Stats directory: <living-park>/stats_QCed/sampled
Output directory: <living-park>/ancova_ieee


In [10]:
def get_cohort_stats():
    filename = root_dir / "cohort" / "longitudinal_cohort_qced.csv"
    df_clinical = pd.read_csv(filename)
    print(f"Load cohort stats: {os.path.abspath(filename)}")
    columns = [
        "PATNO",
        "first_visit",
        "second_visit",
        "dx_group",
        "SEX",
        "AGE_AT_VISIT",
        "UPDRS",
    ]
    df_clinical.rename(columns={"NP3TOT": "UPDRS"}, inplace=True)
    print(
        f"Number of PD-non-MCI subjects: {df_clinical[df_clinical['dx_group']=='PD-non-MCI']['PATNO'].nunique()}"
    )
    print(
        f"Number of HC subjects: {df_clinical[df_clinical['dx_group']=='HC']['PATNO'].nunique()}"
    )
    print(f"Total number of subjects: {df_clinical['PATNO'].nunique()}")
    return df_clinical[columns]


df_clinical = get_cohort_stats()

Load cohort stats: /mnt/lustre/ychatel/living-park/VIP-python-client/example/freesurfer-fuzzy/cohort/longitudinal_cohort_qced.csv
Number of PD-non-MCI subjects: 112
Number of HC subjects: 89
Total number of subjects: 201


# ANCOVA

## Cortical

In [3]:
def read_table(hemi, measure):
    filename = root_dir / "table_ieee" / f"{hemi}.aparc.{measure}.tsv"
    df = pd.read_csv(filename, sep="\t")
    df["hemi"] = hemi
    df.columns = [c.replace(f"{hemi}.", "") for c in df.columns]
    df.columns = [c.replace(f"{hemi}_", "") for c in df.columns]
    df.columns = [c.replace(f"_{measure}", "") for c in df.columns]
    df.rename(columns={f"aparc.{measure}": "first_visit"}, inplace=True)
    return df


def read_measure(measure):
    lh = read_table("lh", measure)
    rh = read_table("rh", measure)
    return pd.concat([lh, rh], axis=0)


def get_baseline_ancova(metric):
    df = read_measure(metric)
    df = df.melt(id_vars=["first_visit", "hemi"], var_name="region", value_name=metric)
    df = pd.merge(df, df_clinical, on="first_visit")
    df = df[["first_visit", "hemi", "region", metric, "dx_group", "AGE_AT_VISIT"]]
    return df

In [4]:
import pingouin as pg


def compute_ancova(measure, clinical_df, force):
    filename = output_dir / f"ancova_baseline_{measure}.csv"
    if not force and os.path.exists(filename):
        return pd.read_csv(filename)

    df = get_baseline_ancova(measure)
    df = pd.merge(
        df,
        clinical_df,
        left_on="first_visit",
        right_on="first_visit",
        suffixes=("", "_clinical"),
    )
    df = df[
        ["first_visit", "region", measure, "hemi", "dx_group", "AGE_AT_VISIT", "SEX"]
    ]

    ancova_df = pd.DataFrame(columns=["hemisphere", "region", "F", "pval"])
    for hemi in df["hemi"].unique():
        for region in df["region"].unique():
            df_region = df[(df["hemi"] == hemi) & (df["region"] == region)]
            ancova = pg.ancova(
                data=df_region,
                dv=measure,
                between="dx_group",
                covar=["AGE_AT_VISIT", "SEX"],
            )
            (F, pval) = ancova["F"].values[0], ancova["p-unc"].values[0]
            ancova_df.loc[len(ancova_df)] = [hemi, region, F, pval]

    filename = output_dir / f"ancova_baseline_{measure}.csv"
    ancova_df.to_csv(filename, index=False)

    return ancova_df

In [5]:
ancova_volume = compute_ancova("volume", df_clinical, force=True)
ancova_thickness = compute_ancova("thickness", df_clinical, force=True)
ancova_area = compute_ancova("area", df_clinical, force=True)

In [6]:
ancova_volume[ancova_volume["pval"] < 0.05].sort_values("F", ascending=False)

Unnamed: 0,hemisphere,region,F,pval
37,rh,caudalanteriorcingulate,21.440099,7e-06
61,rh,rostralmiddlefrontal,16.647764,6.6e-05
34,lh,BrainSegVolNotVent,14.239774,0.000214
70,rh,BrainSegVolNotVent,14.239774,0.000214
21,lh,posteriorcingulate,11.978911,0.000662
18,lh,parstriangularis,11.649638,0.000782
57,rh,posteriorcingulate,11.519653,0.000835
60,rh,rostralanteriorcingulate,11.235996,0.000964
54,rh,parstriangularis,10.742185,0.001241
53,rh,parsorbitalis,10.211387,0.001629


In [None]:
ancova_thickness[ancova_thickness["pval"] < 0.05].sort_values("F", ascending=False)

Unnamed: 0,hemisphere,region,F,pval
35,lh,BrainSegVolNotVent,14.239774,0.000214
72,rh,BrainSegVolNotVent,14.239774,0.000214
15,lh,paracentral,9.254737,0.002674
10,lh,lateralorbitofrontal,6.999684,0.008821
73,rh,eTIV,6.551243,0.011243
36,lh,eTIV,6.551243,0.011243
4,lh,entorhinal,6.124535,0.01419
41,rh,entorhinal,5.804497,0.01692
22,lh,precentral,4.865291,0.028577
51,rh,parahippocampal,4.801788,0.029621


In [8]:
ancova_area[ancova_area["pval"] < 0.05].sort_values("F", ascending=False)

Unnamed: 0,hemisphere,region,F,pval
38,rh,caudalanteriorcingulate,20.408662,1.1e-05
35,lh,BrainSegVolNotVent,14.239774,0.000214
72,rh,BrainSegVolNotVent,14.239774,0.000214
62,rh,rostralmiddlefrontal,14.128947,0.000226
55,rh,parstriangularis,10.329402,0.001533
18,lh,parstriangularis,10.093359,0.001732
21,lh,posteriorcingulate,9.650263,0.002177
16,lh,parsopercularis,8.860224,0.003286
58,rh,posteriorcingulate,8.081905,0.00495
61,rh,rostralanteriorcingulate,7.11447,0.008292


## Subcortical Volume

In [12]:
filename = root_dir / "table_ieee" / "aseg.volume.tsv"
df = pd.read_csv(filename, sep="\t")
df.rename(columns={"Measure:volume": "first_visit"}, inplace=True)
df = df.melt(id_vars=["first_visit"], var_name="region", value_name="volume")
df = pd.merge(
    df,
    df_clinical,
    left_on="first_visit",
    right_on="first_visit",
    suffixes=("", "_clinical"),
)
df = df[["first_visit", "region", "volume", "dx_group", "AGE_AT_VISIT", "SEX"]]

ancova_subcortical_volume_df = pd.DataFrame(columns=["region", "F", "pval"])
for region in tqdm(df["region"].unique()):
    df_region = df[df["region"] == region]
    ancova = pg.ancova(
        data=df_region, dv="volume", between="dx_group", covar=["AGE_AT_VISIT", "SEX"]
    )
    (F, pval) = ancova["F"].values[0], ancova["p-unc"].values[0]
    ancova_subcortical_volume_df.loc[len(ancova_subcortical_volume_df)] = [
        region,
        F,
        pval,
    ]

filename = output_dir / "ancova_baseline_subcortical_volume.csv"
ancova_subcortical_volume_df.to_csv(filename, index=False)

In [10]:
ancova_subcortical_volume_df[ancova_subcortical_volume_df["pval"] < 0.05].sort_values(
    "F", ascending=False
)

Unnamed: 0,region,F,pval
3,Left-Cerebellum-Cortex,18.105763,3.2e-05
25,Right-Pallidum,17.169696,5.1e-05
20,Right-Cerebellum-White-Matter,17.163566,5.1e-05
21,Right-Cerebellum-Cortex,15.265974,0.000129
29,Right-VentralDC,14.713701,0.000169
46,BrainSegVolNotVent,14.239774,0.000214
26,Right-Hippocampus,13.625621,0.00029
54,TotalGrayVol,13.294175,0.000342
2,Left-Cerebellum-White-Matter,13.00038,0.000396
53,SubCortGrayVol,12.615851,0.00048
