# Partial correlation at baseline (IEEE)

## Purpose

Compute partial correlation between metrics and UPDRS score at baseline.
Metrics are cortical thickness, area, volume and subcortical volume.

## Definition

Pingouin [method](https://pingouin-stats.org/build/html/generated/pingouin.partial_corr.html#pingouin.partial_corr)

Partial correlation [1] measures the degree of association between x and y, after removing the effect of one or more controlling variables (covar or $Z$). Practically, this is achieved by calculating the correlation coefficient between the residuals of two linear regressions:

$$x \sim Z, y \sim Z$$

Like the correlation coefficient, the partial correlation coefficient takes on a value in the range from â€“1 to 1, where 1 indicates a perfect positive association.

The semipartial correlation is similar to the partial correlation, with the exception that the set of controlling variables is only removed for either x or y, but not both.

Pingouin uses the method described in [2] to calculate the (semi)partial correlation coefficients and associated p-values. This method is based on the inverse covariance matrix and is significantly faster than the traditional regression-based method. Results have been tested against the ppcor R package.

## Get info about subjects

In [10]:
import pandas as pd
import pingouin as pg
import os
from pathlib import Path

# suppress warnings
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", message=".*column_view.*")

anonymizer = True

root_dir = Path.cwd()


def anondir(path: Path, prefix=root_dir) -> Path:
    """Anonymize a directory path by replacing user-specific parts with <root>."""
    if not anonymizer:
        return path
    path_str = str(path).replace(str(prefix), "<living-park>")
    return Path(path_str)


display(f"Running in root dir: {anondir(root_dir)}")
stats_dir = Path(root_dir) / "stats_QCed" / "sampled"
print(f"Stats directory: {anondir(stats_dir)}")

output_dir = root_dir / "partial_correlation_ieee"
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {anondir(output_dir)}")

'Running in root dir: <living-park>'

Stats directory: <living-park>/stats_QCed/sampled
Output directory: <living-park>/partial_correlation_ieee


In [11]:
def get_cohort_stats():
    filename = root_dir / "cohort" / "longitudinal_cohort_qced.csv"
    df_clinical = pd.read_csv(filename)
    print(f"Load cohort stats: {os.path.abspath(filename)}")
    columns = [
        "PATNO",
        "first_visit",
        "second_visit",
        "dx_group",
        "SEX",
        "AGE_AT_VISIT",
        "UPDRS",
    ]
    df_clinical = df_clinical[df_clinical.dx_group == "PD-non-MCI"]
    df_clinical.rename(columns={"NP3TOT": "UPDRS"}, inplace=True)
    print(
        f"Number of PD-non-MCI subjects: {df_clinical[df_clinical['dx_group']=='PD-non-MCI']['PATNO'].nunique()}"
    )
    print(
        f"Number of HC subjects: {df_clinical[df_clinical['dx_group']=='HC']['PATNO'].nunique()}"
    )
    print(f"Total number of subjects: {df_clinical['PATNO'].nunique()}")
    return df_clinical[columns]


df_clinical = get_cohort_stats()

Load cohort stats: /mnt/lustre/ychatel/living-park/VIP-python-client/example/freesurfer-fuzzy/cohort/longitudinal_cohort_qced.csv
Number of PD-non-MCI subjects: 112
Number of HC subjects: 0
Total number of subjects: 112


## Partial correlation

In [12]:
def read_table(hemi, measure):
    filename = root_dir / "table_ieee" / f"{hemi}.aparc.{measure}.tsv"
    df = pd.read_csv(filename, sep="\t")
    df["hemi"] = hemi
    df.columns = [c.replace(f"{hemi}.", "") for c in df.columns]
    df.columns = [c.replace(f"{hemi}_", "") for c in df.columns]
    df.columns = [c.replace(f"_{measure}", "") for c in df.columns]
    df.rename(columns={f"aparc.{measure}": "first_visit"}, inplace=True)
    return df


def read_measure(measure):
    lh = read_table("lh", measure)
    rh = read_table("rh", measure)
    return pd.concat([lh, rh], axis=0)


def get_baseline_pcorr(metric):
    df = read_measure(metric)
    df = df.melt(id_vars=["first_visit", "hemi"], var_name="region", value_name=metric)
    df = pd.merge(df, df_clinical, on="first_visit")
    df = df[
        ["first_visit", "hemi", "region", metric, "dx_group", "AGE_AT_VISIT", "SEX"]
    ]
    return df

### Cortical

In [13]:
def compute_partial_correlation(metric, clinical_df, force=False):
    baseline_df = get_baseline_pcorr(metric)
    baseline_df = pd.merge(
        baseline_df,
        clinical_df,
        left_on="first_visit",
        right_on="first_visit",
        suffixes=("", "_clinical"),
    )
    baseline_df = baseline_df[
        ["first_visit", "region", metric, "hemi", "AGE_AT_VISIT", "SEX", "UPDRS"]
    ]

    columns = ["region", "hemisphere", "r", "p-val", "n"]
    partial_correlation_df = pd.DataFrame(columns=columns)

    errors = []
    hemispheres = ["lh", "rh"]
    for hemi in hemispheres:
        for region in baseline_df["region"].unique():
            data = baseline_df[
                (baseline_df["region"] == region) & (baseline_df["hemi"] == hemi)
            ]
            pc = pg.partial_corr(
                data=data,
                x=metric,
                y="UPDRS",
                covar=["AGE_AT_VISIT", "SEX"],
                method="pearson",
            )

            (r, pval, n) = (pc["r"][0], pc["p-val"][0], pc["n"][0])
            idx = len(partial_correlation_df)
            partial_correlation_df.loc[idx] = [region, hemi, r, pval, n]

    for error in errors:
        print(error)

    filename = output_dir / f"partial_correlation_baseline_{metric}.csv"
    partial_correlation_df.to_csv(filename, index=False)

    return partial_correlation_df

In [14]:
pcorr_thickness = compute_partial_correlation("thickness", df_clinical)
pcorr_area = compute_partial_correlation("area", df_clinical)
pcorr_volume = compute_partial_correlation("volume", df_clinical)

In [15]:
pcorr_thickness[pcorr_thickness["p-val"] < 0.05]

Unnamed: 0,region,hemisphere,r,p-val,n
5,fusiform,lh,-0.206074,0.030782,112
6,inferiorparietal,lh,-0.197429,0.038697,112
9,lateraloccipital,lh,-0.190904,0.045742,112
19,pericalcarine,lh,-0.26658,0.004876,112
20,postcentral,lh,-0.300517,0.001423,112
22,precentral,lh,-0.241495,0.011034,112
27,superiorparietal,lh,-0.23404,0.01386,112
28,superiortemporal,lh,-0.215173,0.023978,112
29,supramarginal,lh,-0.25438,0.007324,112
34,MeanThickness,lh,-0.218221,0.022007,112


In [16]:
pcorr_area[pcorr_area["p-val"] < 0.05]

Unnamed: 0,region,hemisphere,r,p-val,n
7,inferiortemporal,lh,-0.216926,0.022826,112
21,posteriorcingulate,lh,-0.226854,0.017161,112
43,inferiorparietal,rh,-0.212418,0.025887,112
48,lingual,rh,0.195647,0.040525,112


In [17]:
pcorr_volume[pcorr_volume["p-val"] < 0.05]

Unnamed: 0,region,hemisphere,r,p-val,n
5,fusiform,lh,-0.187863,0.049374,112
7,inferiortemporal,lh,-0.22864,0.016283,112
20,postcentral,lh,-0.216681,0.022985,112
21,posteriorcingulate,lh,-0.310625,0.000957,112
28,superiortemporal,lh,-0.190849,0.045807,112
41,fusiform,rh,-0.22963,0.015813,112
42,inferiorparietal,rh,-0.271107,0.004173,112
69,insula,rh,-0.200212,0.035982,112


## Subcortical Volume


In [18]:
filename = root_dir / "table_ieee" / "aseg.volume.tsv"
df = pd.read_csv(filename, sep="\t")
df.rename(columns={"Measure:volume": "first_visit"}, inplace=True)
df = df.melt(id_vars=["first_visit"], var_name="region", value_name="volume")
df = pd.merge(
    df,
    df_clinical,
    left_on="first_visit",
    right_on="first_visit",
    suffixes=("", "_clinical"),
)
df = df[["first_visit", "region", "volume", "dx_group", "AGE_AT_VISIT", "SEX", "UPDRS"]]

columns = ["region", "r", "p-val", "n"]
partial_correlation_df = pd.DataFrame(columns=columns)

for region in df["region"].unique():
    data = df[(df["region"] == region)]
    pc = pg.partial_corr(
        data=data,
        x="volume",
        y="UPDRS",
        covar=["AGE_AT_VISIT", "SEX"],
        method="pearson",
    )

    (r, pval, n) = (pc["r"][0], pc["p-val"][0], pc["n"][0])
    idx = len(partial_correlation_df)
    partial_correlation_df.loc[idx] = [region, r, pval, n]

filename = output_dir / "partial_correlation_baseline_subcortical_volume.csv"
partial_correlation_df.to_csv(filename, index=False)

partial_correlation_df[partial_correlation_df["p-val"] < 0.05]

Unnamed: 0,region,r,p-val,n
5,Left-Caudate,-0.194699,0.041526,112
6,Left-Putamen,-0.242001,0.010862,112
8,3rd-Ventricle,-0.212748,0.025652,112
24,Right-Putamen,-0.23654,0.012849,112
26,Right-Hippocampus,-0.19957,0.036594,112
33,WM-hypointensities,0.212364,0.025926,112
62,SurfaceHoles,0.188365,0.048758,112
