In [263]:
import pandas as pd
from statsmodels.stats.multitest import fdrcorrection

In [264]:
eqtl_type = 'brain'
dis_thres = 250
pheno = 'bagm3'

df = pd.read_csv(f'results_{dis_thres}kb_eqtls_{pheno}/mr_res_{eqtl_type}.csv')
df_plei = pd.read_csv(f'results_{dis_thres}kb_eqtls_{pheno}/pleiotropy_res_{eqtl_type}.csv')

In [265]:
if eqtl_type == 'blood':
    df['eQTL_type'] = 'Blood eQTL'
else:
    df['eQTL_type'] = 'Brain Tissue eQTL'
df['outcome'] = 'BAG > 3 years' if pheno == 'bag3' else 'BAG < -3 years'
df['method'].value_counts()

Inverse variance weighted    1578
MR Egger                     1017
Weighted median              1017
Simple mode                  1017
Weighted mode                1017
Wald ratio                    960
Name: method, dtype: int64

In [266]:
# fdr correction on each method
fdr_res = []
for method in pd.unique(df['method']):
    print(method)
    dfp = df[df['method'] == method][['id.exposure', 'method', 'pval']].copy()
    # fdr correction p-value
    p_adj = fdrcorrection(dfp['pval'].to_numpy(), is_sorted=False)
    dfp['fdr'] = p_adj[1]
    fdr_res.append(dfp[['id.exposure', 'method', 'fdr']])

Inverse variance weighted
MR Egger
Weighted median
Simple mode
Weighted mode
Wald ratio


In [267]:
df_fdr_res = pd.concat(fdr_res, axis=0)

In [268]:
# merge with original result
df = pd.merge(df, df_fdr_res, on=['id.exposure', 'method'])

In [269]:
# merge mr egger test result
df_plei.rename(columns={'pval': 'egger_pval', 'se': 'egger_se'}, inplace=True)
df_plei.drop(columns=['id.outcome', 'outcome', 'exposure'], inplace=True)
df = pd.merge(df, df_plei, on=['id.exposure'], how='left')

In [270]:
# load drug info
df_drug_info = pd.read_excel('data/druggable_genome.xlsx', sheet_name='Data')
df = pd.merge(df_drug_info, df, left_on='ensembl_gene_id', right_on='id.exposure')

In [271]:
df['id.exposure'] = df['hgnc_names']
# find significant results
df['significant'] = (df['fdr'] < 0.05).astype(int)

In [272]:
# count significant method for each druggable gene
# df_grouped = df.groupby(['ensembl_gene_id'])['significant'].sum()
# df_grouped = pd.DataFrame(df_grouped).reset_index()
# df_grouped.rename(columns={'significant': 'significant_num'}, inplace=True)

In [273]:
# df = pd.merge(df, df_grouped, on='ensembl_gene_id')
df['egger_pval'].fillna(1, inplace=True)
df.to_csv(f'results_{dis_thres}kb_eqtls_{pheno}/mr_result_{eqtl_type}_preprocessed.csv', index=False)

In [274]:
# select significant results: <=2 snp: only 1 method, >=3 snps: 5 method, at least 3 method must be significant
# df_significant = df[(((df['nsnp'] <= 2) & (df['significant_num'] == 1)) | ((df['nsnp'] > 2) & (df['significant_num'] > 2)))].copy()

In [275]:
# if MR egger can not be done, set p-value = 1
# df_significant['egger_pval'].fillna(1, inplace=True)
# print(df_significant['egger_pval'])

In [276]:
# remove results with horizontal pleiotropy
# df_significant = df_significant[df_significant['egger_pval'] > 0.05]

In [277]:
# df_significant.to_csv(f'results_{dis_thres}kb_eqtls_{pheno}/mr_significant_{eqtl_type}_5methods.csv', index=False)


In [278]:
# significant results only ivw or wald ratio
df_ivw = df[df['fdr'] < 0.05]
df_ivw = df_ivw[((df_ivw['nsnp'] <= 2) | ((df_ivw['nsnp'] > 2) & (df_ivw['egger_pval'] > 0.05) & (df_ivw['method'] == 'Inverse variance weighted')))]
df_ivw.to_csv(f'results_{dis_thres}kb_eqtls_{pheno}/mr_significant_{eqtl_type}.csv', index=False)

In [279]:
# colocalization test
df_coloc = pd.read_csv(f'coloc_results/coloc_{eqtl_type}_{pheno}.csv')
df_coloc = df_coloc[['ensembl_gene_id', 'snp', 'SNP.PP.H4']]

In [280]:
df_ivw_coloc = pd.merge(df_ivw, df_coloc, on='ensembl_gene_id', how='inner')

In [281]:
df_ivw_coloc.to_csv(f'results_{dis_thres}kb_eqtls_{pheno}/mr_significant_{eqtl_type}_coloc.csv', index=False)