In [8]:
import os
import pandas as pd
from statsmodels.stats.multitest import fdrcorrection

In [9]:
pqtl_type = 'blood'
pheno = 'bag'
res_path = f'results_decode_{pheno}'
df = pd.read_csv(os.path.join(res_path, f'mr_res_{pqtl_type}.csv'))
if pheno == 'bag3':
    df['outcome'] = 'BAG > 3 years'
elif pheno == 'bagm3':
    df['outcome'] = 'BAG < -3 years'
else:
    df['outcome'] = 'BAG'
    
df['Source'] = 'deCODE'
df_plei = pd.read_csv(os.path.join(res_path, f'pleiotropy_res_{pqtl_type}.csv'))

In [10]:
ids = df['id.exposure'].tolist()
new_ids = []
for idx in ids:
    pices = idx.split('_')
    new_id = pices[0] + '_' + pices[1]
    new_ids.append(new_id)
df['SeqId'] = new_ids

In [11]:
df['method'].value_counts()

method
Inverse variance weighted    32
MR Egger                     31
Weighted median              31
Simple mode                  31
Weighted mode                31
Wald ratio                    2
Name: count, dtype: int64

In [12]:
# fdr correction on each method
fdr_res = []
for method in pd.unique(df['method']):
    print(method)
    dfp = df[df['method'] == method][['id.exposure', 'method', 'pval']].copy()
    # fdr correction p-value
    p_adj = fdrcorrection(dfp['pval'].to_numpy(), is_sorted=False)
    dfp['fdr'] = p_adj[1]
    fdr_res.append(dfp[['id.exposure', 'method', 'fdr']])

MR Egger
Weighted median
Inverse variance weighted
Simple mode
Weighted mode
Wald ratio


In [13]:
df_fdr_res = pd.concat(fdr_res, axis=0)

In [14]:
# merge with original result
df = pd.merge(df, df_fdr_res, on=['id.exposure', 'method'])

In [15]:
# merge mr egger test result
df_plei.rename(columns={'pval': 'egger_pval', 'se': 'egger_se'}, inplace=True)
df_plei.drop(columns=['id.outcome', 'outcome', 'exposure'], inplace=True)
df = pd.merge(df, df_plei, on=['id.exposure'], how='left')

In [16]:
df_drug_info = pd.read_csv(f'data/pqtls_deCODE_{pheno}.csv')
df_drug_info = df_drug_info[['SeqId', 'Protein (short name)', 'Protein (full name)', 'Gene', 'UniProt', 'Ensembl.Gene.ID']]
df = pd.merge(df_drug_info, df, on='SeqId')

In [17]:
# find significant results
df['BONF_P'] = df['pval'] * len(pd.unique(df['id.exposure']))
df['significant'] = (df['fdr'] < 0.05).astype(int)
df.to_csv(os.path.join(res_path, f'mr_res_{pqtl_type}_all.csv'), index=False)

In [18]:
# significant results only ivw or wald ratio
df_ivw = df[df['fdr'] < 0.05]
df_ivw = df_ivw[((df_ivw['nsnp'] <= 2) | ((df_ivw['nsnp'] > 2) & (df_ivw['egger_pval'] > 0.05) & (df_ivw['method'] == 'Inverse variance weighted')))]
df_ivw.to_csv(os.path.join(res_path, f'mr_significant_{pqtl_type}.csv') , index=False)

In [19]:
# count significant method for each druggable gene
# df_grouped = df.groupby(['id.exposure'])['significant'].sum()
# df_grouped = pd.DataFrame(df_grouped).reset_index()
# df_grouped.rename(columns={'significant': 'significant_num'}, inplace=True)

In [20]:
# df = pd.merge(df, df_grouped, on='id.exposure')

In [21]:
# select significant results: <=2 snp: only 1 method, >=3 snps: 5 method, at least 3 method must be significant
# df_significant = df[(((df['nsnp'] <= 2) & (df['significant_num'] == 1)) | ((df['nsnp'] > 2) & (df['significant_num'] > 2)))].copy()

In [22]:
# if MR egger can not be done, set p-value = 1
# df_significant['egger_pval'].fillna(1, inplace=True)
# print(df_significant['egger_pval'])

In [23]:
# remove results with horizontal pleiotropy
# df_significant = df_significant[df_significant['egger_pval'] > 0.05]

In [24]:
# df_significant.to_csv(os.path.join(res_path, f'mr_significant_{pqtl_type}_5methods.csv') , index=False)