In [11]:
import pandas as pd
import numpy as np
from statsmodels.stats.multitest import fdrcorrection

In [12]:
pheno = 'bag'
df_blood = pd.read_csv(f'results_{pheno}/phwas_res_blood.csv')
df_brain = pd.read_csv(f'results_{pheno}/phwas_res_brain.csv')

df_plei_blood = pd.read_csv(f'results_{pheno}/pleiotropy_res_blood.csv')
df_plei_brain = pd.read_csv(f'results_{pheno}/pleiotropy_res_brain.csv')

N = len(pd.unique(df_blood['id.outcome']))
print(N)
df_blood['eqtl_type'] = 'Blood eQTL'
df_brain['eqtl_type'] = 'Brain eQTL'
print(len(pd.unique(df_blood['id.exposure'])))
print(len(pd.unique(df_brain['id.exposure'])))

44
55
10


In [13]:
# fdr correction on each method on blood
fdr_res = []
for method in pd.unique(df_blood['method']):
    print(method)
    dfp = df_blood[df_blood['method'] == method][['id.exposure', 'id.outcome', 'method', 'pval']].copy()
    # fdr correction p-value
    p_adj = fdrcorrection(dfp['pval'].to_numpy(), is_sorted=False)
    dfp['fdr'] = p_adj[1]
    fdr_res.append(dfp[['id.exposure', 'id.outcome',  'method', 'fdr']])

# merge with original result
df_fdr_res = pd.concat(fdr_res, axis=0)
df_blood = pd.merge(df_blood, df_fdr_res, on=['id.exposure', 'id.outcome', 'method'])
print(len(pd.unique(df_blood['id.exposure'])))

MR Egger
Weighted median
Inverse variance weighted
Simple mode
Weighted mode
Wald ratio
55


In [14]:
# fdr correction on each method on brain
fdr_res = []
for method in pd.unique(df_brain['method']):
    print(method)
    dfp = df_brain[df_brain['method'] == method][['id.exposure', 'id.outcome', 'method', 'pval']].copy()
    # fdr correction p-value
    p_adj = fdrcorrection(dfp['pval'].to_numpy(), is_sorted=False)
    dfp['fdr'] = p_adj[1]
    fdr_res.append(dfp[['id.exposure', 'id.outcome',  'method', 'fdr']])

# merge with original result
df_fdr_res = pd.concat(fdr_res, axis=0)
df_brain = pd.merge(df_brain, df_fdr_res, on=['id.exposure', 'id.outcome', 'method'])
print(len(pd.unique(df_brain['id.exposure'])))

Inverse variance weighted
MR Egger
Weighted median
Simple mode
Weighted mode
Wald ratio
10


In [15]:
# if FDR = NAN, set FDR = 1
df_blood['fdr'].fillna(1, inplace=True)
df_brain['fdr'].fillna(1, inplace=True)

# concat brain and blood results
df = pd.concat([df_blood, df_brain], axis=0)
df_plei = pd.concat([df_plei_blood, df_plei_brain], axis=0)

In [16]:
# merge mr egger test result
df_plei.rename(columns={'pval': 'egger_pval', 'se': 'egger_se'}, inplace=True)
df_plei.drop(columns=['outcome', 'exposure'], inplace=True)
df = pd.merge(df, df_plei, on=['id.exposure', 'id.outcome'], how='left')
# print(len(pd.unique(df[['id.exposure', 'eqtl_type']])))

In [17]:
# load drug info
df_drug_info = pd.read_excel('data/druggable_genome.xlsx', sheet_name='Data')
df = pd.merge(df_drug_info, df, left_on='ensembl_gene_id', right_on='id.exposure')

In [18]:
df['id.exposure'] = df['hgnc_names']
# find significant results
df['significant'] = (df['fdr'] < 0.05).astype(int)
print(len(pd.unique(df['id.exposure'])))

64


In [19]:
# count significant method for each druggable gene
df_grouped = df.groupby(['ensembl_gene_id', 'id.outcome'])['significant'].sum()
df_grouped = pd.DataFrame(df_grouped).reset_index()
df_grouped.rename(columns={'significant': 'significant_num'}, inplace=True)

In [20]:
df = pd.merge(df, df_grouped, on=['ensembl_gene_id', 'id.outcome'])
# if MR egger can not be done, set p-value = 1
df['egger_pval'].fillna(1, inplace=True)
df.to_csv(f'results_{pheno}/mr_res_with_fdr.csv', index=False)
print(len(pd.unique(df['id.exposure'])))

64


In [21]:
# select significant results: <=2 snp: only 1 method, >=3 snps: 5 method, at least 3 method must be significant
df_significant = df[(((df['nsnp'] <= 2) & (df['significant_num'] == 1)) | ((df['nsnp'] > 2) & (df['significant_num'] > 2)))].copy()

In [22]:
# remove results with horizontal pleiotropy
df_significant = df_significant[df_significant['egger_pval'] > 0.05]
print(len(df_significant))

155


In [23]:
df_significant.to_csv(f'results_{pheno}/mr_significant_5methods.csv', index=False)

In [24]:
# significant results only ivw or wald ratio
df_ivw = df[df['fdr'] < 0.05]
df_ivw = df_ivw[((df_ivw['nsnp'] <= 2) | ((df_ivw['nsnp'] > 2) & (df_ivw['egger_pval'] > 0.05) & (df_ivw['method'] == 'Inverse variance weighted')))]
df_ivw.to_csv(f'results_{pheno}/mr_significant.csv', index=False)

In [25]:
print(len(pd.unique(df_ivw['id.exposure'])))

58


In [26]:
df_eqtls_blood = pd.read_csv('eqtls_bag/significant_blood_eqtls.csv')
df_eqtls_brain = pd.read_csv('eqtls_bag/significant_brain_eqtls.csv')

In [27]:
print(len(pd.unique(df_eqtls_blood['ensembl_gene_id'])))
print(len(pd.unique(df_eqtls_brain['ensembl_gene_id'])))

55
10


In [28]:
genes_used = pd.unique(df_ivw['ensembl_gene_id'])
genes_blood = pd.unique(df_eqtls_blood['ensembl_gene_id'])
genes_brain = pd.unique(df_eqtls_brain['ensembl_gene_id'])

In [29]:
# genes not used in MR
genes_not_used_blood = np.setdiff1d(genes_blood, genes_used)
print(genes_not_used_blood)
genes_not_used_brain = np.setdiff1d(genes_brain, genes_used)
print(genes_not_used_brain)

['ENSG00000006062' 'ENSG00000116985' 'ENSG00000167434' 'ENSG00000173786'
 'ENSG00000175894' 'ENSG00000196230']
[]


In [30]:
df_not_used = df[df['ensembl_gene_id'].isin(genes_not_used_blood)]