In [44]:
import os
import pandas as pd
from statsmodels.stats.multitest import fdrcorrection

In [45]:
pqtl_type = 'blood'
study = 'interval'
dis_thres = 250
pheno = 'bag3'

res_path = f'results_{study}_{pheno}'

df = pd.read_csv(os.path.join(res_path, f'mr_res_{pqtl_type}.csv'))
df_plei = pd.read_csv(os.path.join(res_path, f'pleiotropy_res_{pqtl_type}.csv'))

df['outcome'] = 'BAG > 3 years' if pheno == 'bag3' else 'BAG < -3 years'
df['Source'] = 'INTERVAL'

In [46]:
df['method'].value_counts()

Inverse variance weighted    10
MR Egger                      9
Weighted median               9
Simple mode                   9
Weighted mode                 9
Wald ratio                    3
Name: method, dtype: int64

In [47]:
# fdr correction on each method
fdr_res = []
for method in pd.unique(df['method']):
    print(method)
    dfp = df[df['method'] == method][['id.exposure', 'method', 'pval']].copy()
    # fdr correction p-value
    p_adj = fdrcorrection(dfp['pval'].to_numpy(), is_sorted=False)
    dfp['fdr'] = p_adj[1]
    fdr_res.append(dfp[['id.exposure', 'method', 'fdr']])

MR Egger
Weighted median
Inverse variance weighted
Simple mode
Weighted mode
Wald ratio


In [48]:
df_fdr_res = pd.concat(fdr_res, axis=0)

In [49]:
# merge with original result
df = pd.merge(df, df_fdr_res, on=['id.exposure', 'method'])

In [50]:
# merge mr egger test result
df_plei.rename(columns={'pval': 'egger_pval', 'se': 'egger_se'}, inplace=True)
df_plei.drop(columns=['id.outcome', 'outcome', 'exposure'], inplace=True)
df = pd.merge(df, df_plei, on=['id.exposure'], how='left')

In [51]:
# load drug info
df_drug_info = pd.read_csv(f'data/pqtls_interval_{pheno}.csv')
df_drug_info = df_drug_info[['SOMAMER_ID', 'UniProt', 'TargetFullName', 'Target', 'ensembl_gene_id', 'hgnc_names', 'druggability_tier']]
df = pd.merge(df_drug_info, df, left_on='SOMAMER_ID', right_on='id.exposure')
df['id.exposure'] = df['hgnc_names']

In [52]:
# find significant results
df['significant'] = (df['fdr'] < 0.05).astype(int)
df.to_csv(os.path.join(res_path, f'mr_res_complete.csv'), index=False)

In [53]:
# significant results only ivw or wald ratio
df_ivw = df[df['fdr'] < 0.05]
df_ivw = df_ivw[((df_ivw['nsnp'] <= 2) | ((df_ivw['nsnp'] > 2) & (df_ivw['egger_pval'] > 0.05) & (df_ivw['method'] == 'Inverse variance weighted')))]
df_ivw.to_csv(os.path.join(res_path, f'mr_significant_{pqtl_type}.csv') , index=False)

In [54]:
# count significant method for each druggable gene
# df_grouped = df.groupby(['ensembl_gene_id'])['significant'].sum()
# df_grouped = pd.DataFrame(df_grouped).reset_index()
# df_grouped.rename(columns={'significant': 'significant_num'}, inplace=True)

In [55]:
# df = pd.merge(df, df_grouped, on='ensembl_gene_id')

In [56]:
# select significant results: <=2 snp: only 1 method, >=3 snps: 5 method, at least 3 method must be significant
# df_significant = df[(((df['nsnp'] <= 2) & (df['significant_num'] == 1)) | ((df['nsnp'] > 2) & (df['significant_num'] > 2)))].copy()

In [57]:
# if MR egger can not be done, set p-value = 1
# df_significant['egger_pval'].fillna(1, inplace=True)
# print(df_significant['egger_pval'])

In [58]:
# remove results with horizontal pleiotropy
# df_significant = df_significant[df_significant['egger_pval'] > 0.05]

In [59]:
# df_significant.to_csv(os.path.join(res_path, f'mr_significant_{pqtl_type}_5methods.csv') , index=False)