# survival analysis betweem T2D Subtype and diseases

In [None]:
import os
import pandas as pd
import numpy as np
from lifelines import KaplanMeierFitter, CoxPHFitter

In [None]:
diseases = [
    'Alzheimers_disease', 
    'Anorexia_nervosa', 
    'Anxiety_disorder', 
    'Bipolar_disorder', 
    'Depression_disorder', 
    'Epilepsy', 
    'Multiple_sclerosis', 
    'Obsessive_compulsive_disorder', 
    'Parkinsons_disease', 
    'Schizophrenia', 
    'Sleep_disorder', 
    'Stroke'
    # 'hypertension', 
    # 'heart_failure', 
    # 'ischemic_heart_disease', 
    # 'cardiac_arrhythmias', 
    # 'acute_kidney_failure', 
    # 'chronic_kidney_disease',
    # 'glomerulus_nephritis',
    # 'obesity', 
    # 'retinopathy', 
    # 'asthma'
]
subtypes = ['subtype1', 'subtype2']
control = False

In [None]:
# CoxPH for each brain disorder
# save HR, 95% CI lower, upper and p-value, 
res_subtype1 = np.zeros((len(diseases), 11))
for dis in diseases:
    if control:
        df = pd.read_csv(f'data/subtype1_control/survival_data_{dis}.csv')
    else:
        df = pd.read_csv(f'data/subtype1/survival_data_{dis}.csv')
    # drop Eid
    df = df.drop(columns=['Eid', 'Stage'])
    cph = CoxPHFitter()
    cph.fit(df, duration_col='time', event_col=dis)
    # save HR, 95% CI lower, upper and p-value
    res_subtype1[diseases.index(dis), :] = cph.summary.loc['Subtype'].values

In [None]:
# CoxPH for each brain disorder for subtype2
res_subtype2 = np.zeros((len(diseases), 11))
for dis in diseases:
    if control:
        df = pd.read_csv(f'data/subtype2_control/survival_data_{dis}.csv')
    else:
        df = pd.read_csv(f'data/subtype2/survival_data_{dis}.csv')
    # drop Eid
    df = df.drop(columns=['Eid', 'Stage'])
    cph = CoxPHFitter()
    cph.fit(df, duration_col='time', event_col=dis)
    # save HR, 95% CI lower, upper and p-value
    res_subtype2[diseases.index(dis), :] = cph.summary.loc['Subtype'].values

In [None]:
cols = cph.summary.columns.tolist()
print(cols)

In [None]:
# save results
res_subtype1 = pd.DataFrame(res_subtype1, columns=cols)
res_subtype1['Subtype'] = 'Subtype 1'
res_subtype1['Diseases'] = diseases
# rearrange columns
res_subtype1 = res_subtype1[['Subtype', 'Diseases'] + cols]

res_subtype2 = pd.DataFrame(res_subtype2, columns=cols)
res_subtype2['Subtype'] = 'Subtype 2'
res_subtype2['Diseases'] = diseases
# rearrange columns
res_subtype2 = res_subtype2[['Subtype', 'Diseases'] + cols]

In [None]:
# fdr correction
from statsmodels.stats.multitest import multipletests
res_subtype1['P_FDR'] = multipletests(res_subtype1['p'], method='fdr_bh')[1]
res_subtype2['P_FDR'] = multipletests(res_subtype2['p'], method='fdr_bh')[1]

In [None]:
# merge results
res = pd.concat([res_subtype1, res_subtype2], axis=0)
# rename coef to HR
res = res.rename(columns={
    'coef': 'beta', 
    'exp(coef)': 'HR',
    'se(coef)': 'se (beta)',
    'coef lower 95%': 'beta lower 95% CI',
    'coef upper 95%': 'beta upper 95% CI',
    'exp(coef) lower 95%': 'HR lower 95% CI',
    'exp(coef) upper 95%': 'HR upper 95% CI',
})
res['Sig_Note'] = np.where(res['P_FDR'] < 0.05, 'p.adj < 0.05', 'NS')

In [None]:
if control:
    res_path = 'results/SA_diseases_control'
else:
    res_path = 'results/SA_diseases'
os.makedirs(res_path, exist_ok=True)
# res.to_csv(os.path.join(res_path, 'coxph_diseases.csv'), index=False)

In [None]:
res_s1 = res[res['Subtype'] == 'Subtype 1'].copy()
res_s2 = res[res['Subtype'] == 'Subtype 2'].copy()
res_s1.drop(columns='Subtype', inplace=True)
res_s2.drop(columns='Subtype', inplace=True)

res_comp = pd.merge(res_s1, res_s2, on='Diseases', suffixes=('_subtype1', '_subtype2'))
res_comp = res_comp[
    ['Diseases', 'HR_subtype1', 'HR_subtype2',
                     'P_FDR_subtype1','P_FDR_subtype2',
                     'Sig_Note_subtype1', 'Sig_Note_subtype2'
     ]
]
res_comp.to_csv(os.path.join(res_path, 'coxph_diseases_subtype_comp.csv'), index=False)

In [None]:
df_dis_name = pd.read_csv('data/disease_gwas_ids.csv')

In [None]:
res = pd.merge(res, df_dis_name[['pheno', 'pheno_abv']], left_on='Diseases', right_on='pheno', how='left')

In [None]:
res.to_csv(os.path.join(res_path, 'coxph_diseases.csv'), index=False)

In [None]:
# coxph for subtype1 and subtype 2, subtype 1 as reference
res_subtype_comp = np.zeros((len(diseases), 11))
for dis in diseases:
    df1 = pd.read_csv(f'data/subtype1/survival_data_{dis}.csv')
    df2 = pd.read_csv(f'data/subtype2/survival_data_{dis}.csv')
    # remove controls
    df1 = df1[df1['Subtype'] == 1]
    df2 = df2[df2['Subtype'] == 1]
    df1['Subtype'] = 0 # as reference
    df = pd.concat([df1, df2], axis=0)
    # drop Eid
    df = df.drop(columns=['Eid', 'Stage'])
    cph = CoxPHFitter()
    cph.fit(df, duration_col='time', event_col=dis)
    # save HR, 95% CI lower, upper and p-value
    res_subtype_comp[diseases.index(dis), :] = cph.summary.loc['Subtype'].values

In [None]:
res_subtype_comp = pd.DataFrame(res_subtype_comp, columns=cols)
res_subtype_comp['Diseases'] = diseases
res_subtype_comp['Subtype'] = 'Subtype 2 vs 1'
res_subtype_comp = res_subtype_comp[['Diseases'] + cols]
res_subtype_comp['P_FDR'] = multipletests(res_subtype_comp['p'], method='fdr_bh')[1]
res_subtype_comp.rename(columns={
    'coef': 'beta', 
    'exp(coef)': 'HR',
    'se(coef)': 'se (beta)',
    'coef lower 95%': 'beta lower 95% CI',
    'coef upper 95%': 'beta upper 95% CI',
    'exp(coef) lower 95%': 'HR lower 95% CI',
    'exp(coef) upper 95%': 'HR upper 95% CI',
}, inplace=True)
res_subtype_comp = pd.merge(res_subtype_comp, df_dis_name[['pheno', 'pheno_abv']], left_on='Diseases', right_on='pheno', how='left')
res_subtype_comp.to_csv(os.path.join(res_path, 'coxph_diseases_subtype1vs2.csv'), index=False)