In [None]:
import numpy as np
import pandas as pd
import os
import scipy.io as scio
import pickle

import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from scipy import stats
import pingouin as pg
import seaborn as sns
import base_functions as bf
import cluster_base_functions as cbf
from statsmodels.stats.multitest import multipletests 

import warnings
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
# plt.rcParams["font.family"] = "Arial"
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
%matplotlib inline

In [None]:
care_feat = 2 #set the modality to take care with
feat_names = ['anat', 'act', 'conn']
D = pickle.load(open('./results/cluster_multi_modality_2clusters_binary_patient.pkl', 'rb'))
savefile = f'./results/clinic_effect_patient_{feat_names[care_feat]}_kmeans.pdf'
cluster_labels_list = D['cluster_labels_list']
cluster_subjects = D['cluster_subjects']
cluster_vals_list = D['cluster_vals_list']


D = pickle.load(open('./results/combat_cluster_multi_modality_raw_data.pkl', 'rb'))
basic_DF = D['basic_DF']

In [None]:
clinic_names = ['CAPSTOT_cur', 'CAPSB_cur', 'CAPSC_cur', 'CAPSD_cur', 'CAPSE_cur',
                'ETISR_Total', 'sumETI_Gen_Trauma', 'sumETI_Phy_Abuse', 'sumETI_Emo_abuse', 'sumETI_Sex_Abuse',
                'BDI_total', 'ASI', 'STAXI1', 'PANAS_PA', 'PANAS_NA',
                'WHOQOL_overall', 'WHOQOL_GeneralHealth', 'WHOQOL_physical', 'WHOQOL_social', 'WHOQOL_environment']

cognitive_names = ['Motor_Coordination_Norm','Processing_Speed_Norm','Sustained_Attention_Norm',
                   'Controlled_Attention_Norm','Flexibility_Norm','Inhibition_Norm',
                   'Working_Memory_Norm','Recall_Memory_Norm','Executive_Function_Norm',
                   'Identifying_Emotions_Norm','Emotion_Bias_Norm']
biograph_names = ['Age', 'gender', 'edu_years']

clinic_cognitive_names = clinic_names + cognitive_names

In [None]:
all_clinic = bf.get_subject_info(basic_DF, cluster_subjects, clinic_names)
all_cognitive = bf.get_subject_info(basic_DF, cluster_subjects, cognitive_names)

all_group = bf.get_subject_info(basic_DF, cluster_subjects, ['new_group'])
all_biograph = bf.get_subject_info(basic_DF, cluster_subjects, biograph_names)

In [None]:
# function to calculate Cohen's d for independent samples
def cohend(d1, d2):
    # calculate the size of samples
    n1, n2 = len(d1), len(d2)
    # calculate the variance of the samples
    s1, s2 = np.var(d1, ddof=1), np.var(d2, ddof=1)
    # calculate the pooled standard deviation
    s = np.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))
    # calculate the means of the samples
    u1, u2 = np.mean(d1), np.mean(d2)
    # calculate the effect size
    return (u1 - u2) / s

In [None]:
care_meas = np.concatenate((all_clinic, all_cognitive), axis=1)
care_labels = cluster_labels_list[care_feat]
# care_labels = cluster_labels
boot_num = 1000
meas_num = care_meas.shape[1]
boot_meas_pvals = np.zeros((meas_num, boot_num))
boot_meas_eff = np.zeros((meas_num, boot_num))
meas_pvals = np.zeros(meas_num)
for ival in range(meas_num):
    vals = care_meas[:,ival].copy()
 
    sidx = ~np.isnan(vals)
    vals = vals[sidx]
    labs = care_labels[sidx]
    
    a = vals[labs==0]
    b = vals[labs==1]
    tt = pg.ttest(a, b, correction=False)
    meas_pvals[ival] = tt['p-val'][0]
    for ibt in range(boot_num):
        aa = np.random.choice(a, size=len(a), replace=True)
        bb = np.random.choice(b, size=len(b), replace=True)
       
        # meas_eff[ival,ibt] = tt['cohen-d'][0]
        boot_meas_eff[ival,ibt] = cohend(aa,bb)
        
low = np.percentile(boot_meas_eff, 2.5, axis=1)
high = np.percentile(boot_meas_eff, 97.5, axis=1)
mval = np.mean(boot_meas_eff, axis=1)

In [None]:
_, pfdr, _, _ = multipletests(meas_pvals, method='fdr_bh')
pfdr[pfdr<0.05]

In [None]:
colors = sns.color_palette('deep', as_cmap=True)
abbre_names = ['CAPS', 'CAPS_B', 'CAPS_C', 'CAPS_D', 'CAPS_E', 'ETISR', 'ETI_Gen_Trauma', 'ETI_Phy_Abuse', 'ETI_Emo_abuse', 'ETI_Sex_Abuse', 
               'BDI', 'ASI', 'STAXI', 'PANAS_PA', 'PANAS_NA', 'WHOQOL', 'WHOQOL_GeneralHealth', 'WHOQOL_physical', 'WHOQOL_social', 'WHOQOL_environment',
               'Motor_Coordination', 'Processing_Speed', 'Sustained_Attention', 'Controlled_Attention','Flexibility','Inhibition',
               'Working_Memory','Recall_Memory','Executive_Function', 'Identifying_Emotions','Emotion_Bias']

fig, ax = plt.subplots(figsize=(6,8))
y = np.arange(len(mval))[::-1]
x = mval
xerr = np.stack([mval-low,high-mval], axis=0)

ax.errorbar(x[:20], y[:20], xerr=xerr[:,:20], linestyle='None', linewidth=1, marker='.', markersize=10, color=colors[0])
ax.errorbar(x[20:], y[20:], xerr=xerr[:,20:], linestyle='None', linewidth=1, marker='.', markersize=10, color=colors[2])
ax.set_yticks(y);
ax.set_yticklabels(abbre_names);
ax.plot([0,0], [-1,32], linestyle='--', color='gray')
ax.set_ylim([-1,31])
ax.set_xlim([-0.6,0.6])
ax.set_xticks(np.arange(-0.6, 0.61,0.2));
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xlabel('Cohen\'s d', fontsize=10)
# fig.savefig(savefile, bbox_inches="tight")