In [None]:
import os
import numpy as np
import pandas as pd
# chisq
from scipy.stats import chi2_contingency

In [None]:
# load data for brain disorders
df = pd.read_csv('data/data_subtype.csv')
df.columns

In [None]:
# get the list of diseases
diseases = [
    'Alzheimers_disease',
    'Anorexia_nervosa',
    'Anxiety_disorder',
    'Bipolar_disorder',
    'Depression_disorder',
    'Epilepsy',
    'Multiple_sclerosis',
    'Obsessive_compulsive_disorder',
    'Parkinsons_disease',
    'Schizophrenia',
    'Sleep_disorder',
    'Stroke',
    'hypertension', 
    'heart_failure', 
    'ischemic_heart_disease', 
    'cardiac_arrhythmias', 
    'acute_kidney_failure', 
    'chronic_kidney_disease',
    'glomerulus_nephritis',
    'obesity', 
    # 'hyperlipidemia', 
    'retinopathy', 
    'asthma'
]

In [None]:
df_subtype1 = df[df['Subtype'].isin(['control', 'Subtype 1'])].copy()
df_subtype2 = df[df['Subtype'].isin(['control', 'Subtype 2'])].copy()
# map contorl to 0 and subtype to 1
df_subtype1['Subtype'] = df_subtype1['Subtype'].map({'control': 0, 'Subtype 1': 1})
df_subtype2['Subtype'] = df_subtype2['Subtype'].map({'control': 0, 'Subtype 2': 1})

In [None]:
# Odd ratio and chisq test for each brain disorder, subtype1
res_subtype1 = np.zeros((len(diseases), 5))
for i, bd in enumerate(diseases):
    # get the contingency table
    cont_table = pd.crosstab(df_subtype1[bd], df_subtype1['Subtype'])
    # chisq test
    chi2, p, dof, ex = chi2_contingency(cont_table)
    # odd ratio
    odd_ratio = (cont_table.iloc[1, 1] * cont_table.iloc[0, 0]) / (cont_table.iloc[0, 1] * cont_table.iloc[1, 0])
    # 95% CI
    ci = 1.96 * np.sqrt(1 / cont_table.iloc[0, 1] + 1 / cont_table.iloc[1, 0] + 1 / cont_table.iloc[0, 0] + 1 / cont_table.iloc[1, 1])
    ci_low = np.exp(np.log(odd_ratio) - ci)
    ci_high = np.exp(np.log(odd_ratio) + ci)
    res_subtype1[i, :] = [odd_ratio, chi2, p, ci[0], ci[1]]

In [None]:
res_subtype2 = np.zeros((len(diseases), 5))
for i, bd in enumerate(diseases):
    # get the contingency table
    cont_table = pd.crosstab(df_subtype2[bd], df_subtype2['Subtype'])
    # chisq test
    chi2, p, dof, ex = chi2_contingency(cont_table)
    # odd ratio
    odd_ratio = (cont_table.iloc[1, 1] * cont_table.iloc[0, 0]) / (cont_table.iloc[0, 1] * cont_table.iloc[1, 0])
    # 95% CI
    ci = np.exp(np.log(odd_ratio) + np.array([-1.96, 1.96]) * np.sqrt(1 / cont_table.iloc[0, 1] + 1 / cont_table.iloc[1, 0] + 1 / cont_table.iloc[0, 0] + 1 / cont_table.iloc[1, 1]))
    res_subtype2[i, :] = [odd_ratio, chi2, p, ci[0], ci[1]]

In [None]:
# save the results
res_subtype1 = pd.DataFrame(res_subtype1, columns=['Odd ratio', 'Chisq', 'p', '95lowerCI', '95higherCI'])
res_subtype1['Subtype'] = 'Subtype 1'
res_subtype1['Diseases'] = diseases
res_subtype2 = pd.DataFrame(res_subtype2, columns=['Odd ratio', 'Chisq', 'p', '95lowerCI', '95higherCI'])
res_subtype2['Subtype'] = 'Subtype 2'
res_subtype2['Diseases'] = diseases
# reorder the columns
res_subtype1 = res_subtype1[['Subtype', 'Diseases', 'Odd ratio', '95lowerCI', '95higherCI', 'Chisq', 'p']]
res_subtype2 = res_subtype2[['Subtype', 'Diseases', 'Odd ratio', '95lowerCI', '95higherCI', 'Chisq', 'p']]

In [None]:
# fdr correction
from statsmodels.stats.multitest import multipletests
res_subtype1['P_FDR'] = multipletests(res_subtype1['p'], method='fdr_bh')[1]
res_subtype2['P_FDR'] = multipletests(res_subtype2['p'], method='fdr_bh')[1]

In [None]:
# combine the results
res = pd.concat([res_subtype1, res_subtype2])
res['Sig_Note'] = res['P_FDR'].apply(lambda x: '< 0.05' if x < 0.05 else 'NS' )
# save the results
os.makedirs('results/diseases', exist_ok=True)
res.to_csv('results/diseases/common_disease_subtype.csv', index=False)

In [None]:
res.columns

In [None]:
res_s1 = res[res['Subtype'] == 'Subtype 1'].copy()
res_s2 = res[res['Subtype'] == 'Subtype 2'].copy()
res_s1.drop(columns='Subtype', inplace=True)
res_s2.drop(columns='Subtype', inplace=True)
res_comp = pd.merge(res_s1, res_s2, on='Diseases', suffixes=('_subtype1', '_subtype2'))

res_comp = res_comp[
    ['Diseases', 'Odd ratio_subtype1', 'Odd ratio_subtype2',
                     'Chisq_subtype1', 'Chisq_subtype2',
                     'p_subtype1', 'p_subtype2', 
                     'P_FDR_subtype1','P_FDR_subtype2',
                     'Sig_Note_subtype1', 'Sig_Note_subtype2'
     ]
]
res_comp.to_csv('results/diseases/common_disease_subtype_comp.csv', index=False)