In [1]:
import os.path
import pandas as pd

In [2]:
df_blood = pd.read_table('data/blood_eqtls.txt', sep='\t')

In [3]:
len(pd.unique(df_blood['Gene']))

16987

In [4]:
df_brain = pd.read_table('data/brain_tissue_eqtls.txt', sep='\t')

In [5]:
df_brain['SNP_chr'] = df_brain['SNP_chr'].str[3:].astype(int)

In [6]:
len(pd.unique(df_brain['gene_id']))

32944

In [7]:
df_druggable = pd.read_excel(r'data/druggable_genome.xlsx', sheet_name='Data')
dis_thres = 250 * 1000

In [8]:
df_blood['distance'] = df_blood['SNPPos'] - df_blood['GenePos']

In [9]:
print('all eqtls in blood:', len(df_blood))
print(f'eqtls in {dis_thres // 1000}kb in blood:', len(df_blood[abs(df_blood['distance'] <= dis_thres)]))

all eqtls in blood: 10507664
eqtls in 250kb in blood: 8722652


In [10]:
print('all eqtls in brain tissues:', len(df_brain))
print(f'eqtls in {dis_thres // 1000}kb in brain tissues:', len(df_brain[abs(df_brain['SNP_distance_to_TSS'] <= dis_thres)]))

all eqtls in brain tissues: 2542908
eqtls in 250kb in brain tissues: 2199243


In [11]:
df_brain['ensembl_gene_id'] = df_brain['gene_id'].str[0:15]

In [12]:
df_brain_druggable = pd.merge(df_druggable, df_brain, on='ensembl_gene_id')

In [13]:
print('All Durggable genes in brain tissues:', len(pd.unique(df_brain_druggable['ensembl_gene_id'])))

All Durggable genes in brain tissues: 3447


In [14]:
df_blood_druggable = pd.merge(df_druggable, df_blood, left_on='ensembl_gene_id', right_on='Gene')

In [15]:
print('All Durggable genes in blood:', len(pd.unique(df_blood_druggable['ensembl_gene_id'])))

All Durggable genes in blood: 2715


In [16]:
# qc e-QTL distance
df_blood_druggable =  df_blood_druggable[abs(df_blood_druggable['distance']) <= dis_thres]
df_brain_druggable =  df_brain_druggable[abs(df_brain_druggable['SNP_distance_to_TSS']) <= dis_thres]

print(len(pd.unique(df_blood_druggable['ensembl_gene_id'])))
print(len(pd.unique(df_brain_druggable['ensembl_gene_id'])))

2682
2915


In [17]:
save_path = f'data_{dis_thres // 1000}kb_eqtls'
if not os.path.exists(save_path):
    os.mkdir(save_path)

df_blood_druggable.to_csv(f'{save_path}/druggable_blood_eqtls.csv', na_rep='NA', index=False)
df_brain_druggable.to_csv(f'{save_path}/druggable_brain_tissue_eqtls.csv', na_rep='NA', index=False)