# Whole-brain Controllability, Aging, and TIA effects
This notebook contains the code for a re-analysis of controllability and aging, and the effects of TIA have on brain aging.
- Also contains analysis for hub nodes

In [None]:
import math

import networkx as nx
import pandas as pd
import numpy as np
import importlib
import sys
import os

import matplotlib.pyplot as plt
import seaborn as sns
print(sys.version)

import Towlson_group_code.data_io as myFunc
importlib.reload(myFunc)

import PREVENT_functions as prev_fct
importlib.reload(prev_fct)

PICKLE_PATH = '../../PREVENT_Study/pickles/'
FIGURE_PATH = '../../PREVENT_Study/figures/'
DUMP_PATH = '../../PREVENT_Study/dump/'
TIMES = ['BL', 'Y1', 'Y3', 'Y5']
metadata, node_list = prev_fct.load_meta_data()
print(len(node_list))
individual_data = myFunc.load_from_pickle(PICKLE_PATH, 'Normalized_Connectomes.pkl')

color_map = {"BL": '#1f77b4', "Y1": '#ff7f0e', "Y3": '#2ca02c', "Y5": '#d62728'}

In [None]:
from scipy.stats import mannwhitneyu

def get_ctrb_df(rois, use_rank = False):
    age_years_map = {'BL': 0, 'Y1': 1, 'Y3': 3, 'Y5': 5}
    ids = []
    time = []
    age = []
    diagnosis = []
    avg_ctrb = []
    mod_ctrb = []
    for k in individual_data.keys():
        if len(k) == 4:
            ptype = k[:2]
            t = k[2:].upper()
        else:
            ptype = k[0]
            t = k[1:].upper()
        for pid, G in individual_data[k].items():
            diagnosis.append(ptype)
            ids.append(pid)
            time.append(t)
            age.append(metadata.loc[pid]['Age'] + age_years_map[t])
            if use_rank:
                _, G, _ = prev_fct.rank_nodes(G, 'avgCtrb')
                _, G, _ = prev_fct.rank_nodes(G, 'modCtrb')
                avg_ctrb.append(prev_fct.get_avg_node_metric(G, rois, 'avgCtrbRank'))
                mod_ctrb.append(prev_fct.get_avg_node_metric(G, rois, 'modCtrbRank'))
            else:
                avg_ctrb.append(prev_fct.get_avg_node_metric(G, rois, 'avgCtrb'))
                mod_ctrb.append(prev_fct.get_avg_node_metric(G, rois, 'modCtrb'))
    return pd.DataFrame(data = {'ID': ids, 'Time': time, 'C/T': diagnosis, 'Age': age, 'avgCtrb': avg_ctrb, 'modCtrb': mod_ctrb})

def print_nan_columns(df):
    idx = df.index[df.isnull().any(1)]
    nans = df.iloc[idx]
    print(len(idx))
    print(nans)
    print("Number of data points before dropping NaN:", len(df.index))
    print("Number of data points AFTER dropping NaN:", len(df.dropna().index))

# Cross-sectional analysis
Whole-brain controllability changes. Whole brain controllability is defined as the average of all nodal controllability in the network.
- To prevent the problem of having non-independent datapoints, we will separate out baseline, 1-year, 3-year, and 5-year data based on color.
- Since this is not looking at rate of change, all available data points are used.
- $x$ = time (BL, Y1, Y3, Y5), $y$ = whole-brain controllability, color by group (Control vs TIA)

In [None]:
whole_brain_ctrb_df = get_ctrb_df(node_list, use_rank=False)
whole_brain_ctrb_df = whole_brain_ctrb_df.rename(columns={"avgCtrb": "Whole-brain Avg. Ctrb.", "modCtrb": "Whole-brain Modal Ctrb."})
whole_brain_ctrb_df.head(5)

#### Whole-brain Average Ctrb.
Apply mann whiteney test to see if there's significant differences between control vs TIA group at each time point.

In [None]:
for time_group in TIMES:
    subset_df = whole_brain_ctrb_df[whole_brain_ctrb_df["Time"] == time_group]
    control_subset = subset_df[subset_df["C/T"] == "HC"]
    tia_subset = subset_df[subset_df["C/T"] == "P"]

    _, p = mannwhitneyu(control_subset["Whole-brain Avg. Ctrb."], tia_subset["Whole-brain Avg. Ctrb."], method='asymptotic', alternative="two-sided")
    print(time_group, p)

In [None]:
plot_df = whole_brain_ctrb_df
plot_df["C/T"].replace("HC", "Control", inplace=True)
plot_df["C/T"].replace("P", "TIA", inplace=True)
avg_ctrb_plot = sns.boxplot(data=plot_df, x="Time", y="Whole-brain Avg. Ctrb.", hue="C/T", showfliers=False)
sns.move_legend(avg_ctrb_plot, "upper left", bbox_to_anchor=(1, 1))
plt.savefig(FIGURE_PATH+"/new analysis/Whole-brain Average Controllability.png", dpi=250, bbox_inches='tight')

BL

In [None]:
wb_ctrb_hc_df = whole_brain_ctrb_df[whole_brain_ctrb_df["C/T"] == "HC"]
wb_ctrb_tia_df = whole_brain_ctrb_df[whole_brain_ctrb_df["C/T"] == "P"]
sns.regplot(data=wb_ctrb_hc_df[wb_ctrb_hc_df['Time'] == "BL"], x="Age", y="Whole-brain Avg. Ctrb.", color="blue")
sns.regplot(data=wb_ctrb_tia_df[wb_ctrb_tia_df['Time'] == "BL"], x="Age", y="Whole-brain Avg. Ctrb.", color="red")

Y1

In [None]:
sns.regplot(data=wb_ctrb_hc_df[wb_ctrb_hc_df['Time'] == "Y1"], x="Age", y="Whole-brain Avg. Ctrb.", color="blue")
sns.regplot(data=wb_ctrb_tia_df[wb_ctrb_tia_df['Time'] == "Y1"], x="Age", y="Whole-brain Avg. Ctrb.", color="red")

Y3

In [None]:
sns.regplot(data=wb_ctrb_hc_df[wb_ctrb_hc_df['Time'] == "Y3"], x="Age", y="Whole-brain Avg. Ctrb.", color="blue")
sns.regplot(data=wb_ctrb_tia_df[wb_ctrb_tia_df['Time'] == "Y3"], x="Age", y="Whole-brain Avg. Ctrb.", color="red")

Y5

In [None]:
sns.regplot(data=wb_ctrb_hc_df[wb_ctrb_hc_df['Time'] == "Y5"], x="Age", y="Whole-brain Avg. Ctrb.", color="blue")
sns.regplot(data=wb_ctrb_tia_df[wb_ctrb_tia_df['Time'] == "Y5"], x="Age", y="Whole-brain Avg. Ctrb.", color="red")

#### Whole-brain Modal Ctrb.

In [None]:
for time_group in TIMES:
    subset_df = whole_brain_ctrb_df[whole_brain_ctrb_df["Time"] == time_group]
    control_subset = subset_df[subset_df["C/T"] == "HC"]
    tia_subset = subset_df[subset_df["C/T"] == "P"]

    _, p = mannwhitneyu(control_subset["Whole-brain Modal Ctrb."], tia_subset["Whole-brain Modal Ctrb."], method='asymptotic', alternative="two-sided")
    print(time_group, p)

In [None]:
plot_df = whole_brain_ctrb_df
plot_df["C/T"].replace("HC", "Control", inplace=True)
plot_df["C/T"].replace("P", "TIA", inplace=True)
mod_ctrb_plot = sns.boxplot(data=whole_brain_ctrb_df, x="Time", y="Whole-brain Modal Ctrb.", hue="C/T",
                            showfliers=False)
sns.move_legend(mod_ctrb_plot, "upper left", bbox_to_anchor=(1, 1))
plt.savefig(FIGURE_PATH+"/new analysis/Whole-brain Modal Controllability.png", dpi=250, bbox_inches='tight')

#### Results
- The boxplots shows that there is a significant decrease in whole-brain modal ctrb. in TIA group compared to control at BL.
- However over time, there is no significant differences between control and TIA group at follow up years 1, 3 and 5.
- Ask Emma: how to interpret linear regression lines for each follow up?

# Longitudinal analysis:
"Super" controllers
- Are there any correlation between the rate of change of controllability and the node's baseline control rank.
- For this analysis, we assume linearity and only compare changes of 5-year to baseline.
- We calculate the rate based on the number of days between baseline and 5-year scan.

In [None]:
import math

def get_5_year_regional_ctrb(metric, ptype):
    global node_list
    default_days_elapsed = 365*5
    delta_ctrb = []
    bl_rank = []
    y5_data = individual_data[f'{ptype}y5']
    bl_data = individual_data[f'{ptype}bl']
    bl_ids = list(bl_data.keys())
    n = 0
    for pid, y5_G in y5_data.items():
        if pid not in bl_ids:
            continue
        bl_G = bl_data[pid]
        _, bl_G, _ = prev_fct.rank_nodes(bl_G, metric)

        y5_ctrb = np.array([y5_G.nodes[node][metric] for node in node_list])
        bl_ctrb = np.array([bl_G.nodes[node][metric] for node in node_list])

        days_elapsed = metadata.loc[pid]["Time Between BL and 5-Yr"]
        if math.isnan(days_elapsed):
            # print(f"Error: PID = {pid} for {ptype} does not have scan date for 5-yr and BL.")
            days_elapsed = default_days_elapsed

        delta_ctrb.append((y5_ctrb - bl_ctrb)/days_elapsed)  # Instead of 5, use # of days
        bl_rank.append(np.array([bl_G.nodes[node][metric + 'Rank'] for node in node_list]))
        n += 1
    delta_ctrb_narray = np.array(delta_ctrb)
    mean_rank_narray = np.array(bl_rank)
    print("Sample size N = ", n)
    return np.mean(delta_ctrb_narray, axis=0), np.mean(mean_rank_narray, axis=0)

## Does the strength of regional controllability correlate with the amount of change over 5 years?
- Let x-axis be the mean controllability rank
- Let y-axis be the daily change over 5 years of that regional controllability value (averaged across all 5 year subjects)

Average Controllability changes

In [None]:
hc_delta_ctrb, hc_bl_rank = get_5_year_regional_ctrb('avgCtrb', "HC")
p_delta_ctrb, p_bl_rank = get_5_year_regional_ctrb('avgCtrb', "P")

In [None]:
plt.scatter(hc_bl_rank, hc_delta_ctrb, color="blue", label="HC")
plt.scatter(p_bl_rank, p_delta_ctrb, color="orange", marker="^", label="TIA")
plt.xlabel("Mean Rank")
plt.ylabel("Mean $\Delta$ Average Ctrb. over 5 years")
plt.legend()
plt.show()

Modal Controllability changes

In [None]:
hc_delta_ctrb, hc_bl_rank = get_5_year_regional_ctrb('modCtrb', "HC")
p_delta_ctrb, p_bl_rank = get_5_year_regional_ctrb('modCtrb', "P")

In [None]:
plt.scatter(hc_bl_rank, hc_delta_ctrb, color="blue", label="HC")
plt.scatter(p_bl_rank, p_delta_ctrb, color="orange", marker="^", label="TIA")
plt.xlabel("Mean Rank")
plt.ylabel("Mean $\Delta$ Modal Ctrb. over 5 years")
plt.legend()
plt.show()

#### Results
- Nodes with greater average controllability seems to change more over the 5 years. However this change could be increase or decrease.

# Longitudinal analysis: LME

- [X] (1) $y =$ whole-brain ctrb, $x =$ age, covariates = gender, education
- [ ] (2) $y = Cog Score$, $x = $ age
- [ ] (3) $y = Cog Score$, $x = $ wb modal ctrb * we look at modal bc of lit. review paper that found modal changes for these types of clinical cog. tests AND from (1) we know that the group is not a significant factor in predicting average controllability, but rather modal controllability is dependent on group differences.

Export data for (1) to perform LME in R.

In [None]:
whole_brain_ctrb_df = get_ctrb_df(node_list, use_rank=False)
whole_brain_ctrb_df = whole_brain_ctrb_df.rename(columns={
    "avgCtrb": "wbAvgCtrb",
    "modCtrb": "wbModalCtrb",
    "C/T": "group",
    "Age": "age",
    "Time": "time",
    "ID": "id"})

How many TIA patients with both BL and Y5 data?

In [None]:
temp = whole_brain_ctrb_df[whole_brain_ctrb_df["group"] == "HC"]
temp = temp[(temp["time"] == "BL")]
print(len(temp.index))

In [None]:
gender_map = {"M": 0, "F": 1, "X": math.nan}
gender = []
education = []
for pid in whole_brain_ctrb_df["id"]:
    gender.append(gender_map[metadata.loc[pid]["Gender"]])
    education.append(metadata.loc[pid]["education_yrs"])

whole_brain_ctrb_df["education"] = education
whole_brain_ctrb_df["gender"] = gender

print_nan_columns(whole_brain_ctrb_df)

whole_brain_ctrb_df = whole_brain_ctrb_df.dropna()

In [None]:
whole_brain_ctrb_df.to_csv('../../PREVENT_Study/dump/wb_ctrb_aging.csv')
whole_brain_ctrb_df[whole_brain_ctrb_df['id'] == '118']