In [1]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import subprocess
import logging
from ImmuneGWAS.helpers.getpaths import get_paths, get_sumstats_path
from ImmuneGWAS.helpers import dbsnp, ldlink
import ImmuneGWAS.resources.immune_GWAS as immune_GWAS
from ImmuneGWAS import config
from ImmuneGWAS.variant import Variant

from ImmuneGWAS.resources.eqtlgen import eqtlgen_trans_LDblock_query as lookup_trans
from ImmuneGWAS.resources.eqtlgen import eqtlgen_cis_LDblock_query as lookup_cis
from ImmuneGWAS.resources.tokyo_eqtl import tokyo_eqtl_LDblock_query as lookup_tokyo
from ImmuneGWAS.resources.eqtl_cat import *


Designing the lookup:

1) Variant object calls trans_df lookup
2) Harmonize according to EA/OA, raise exceptions for multi-allelic SNPs
3) Split trans_df into up and down
4) Return row-scaled heatmap df where cell ordering is hard-coded and rows are resorted for clustering
5) Plotting function for heatmap df
6) Get flow phenotypes
7) Get LDTrait phenotypes
8) Summarize phenotpyes into tables with betas (remove duplicate phenotypes etc)
9) Get a single summary cis-eQTL table
10) Return summary report - 2 heatmaps for up and down, cis-eQTLs, phenotypes summarized

In [2]:
rsid = "rs1354034"
chrom = 3
pos = 56815721
EA = 'T'
OA = 'C'

x = Variant(rsid, chrom, pos, EA, OA)

In [3]:
# Get trans DF

trans = lookup_trans(x)
trans.Zscore = trans.Zscore.astype(float)
    

# check EA for sign of beta
if set(trans.AssessedAllele == x.EA) == {True}:
    trans.Zscore = trans.Zscore * -1

up = trans[trans.Zscore>0]
down = trans[trans.Zscore<0]

up = up.Gene
down = down.Gene

import seaborn as sns
import matplotlib.pyplot as plt

# fig, ax = plt.subplots()

mapper = dict(zip(trans.Gene, trans.GeneSymbol))

paths = get_paths(root='/media/')

ge = pd.read_csv(paths['ge_tokyo'], sep='\t')

ge = ge.set_index("Gene_id")

ge = ge.drop("Gene_name", axis=1)


ge = ge.drop_duplicates()

u = [i for i in up if i in ge.index]
u = set(u)
d = [i for i in down if i in ge.index]
d = set(d)
u = ge.loc[u]
d = ge.loc[d]

u.index = u.index.map(mapper)
d.index = d.index.map(mapper)

ordered_cols = ['Plasmablast', 'LDG', 'Neu', 'pDC', 'mDC', 'CL_Mono', 'Int_Mono',
       'CD16p_Mono', 'NC_Mono', 'Naive_B', 'USM_B', 'DN_B', 'SM_B', 'NK',
       'Mem_CD8', 'EM_CD8', 'TEMRA_CD8', 'Naive_CD8', 'Fr_I_nTreg',
       'Naive_CD4', 'Fr_III_T', 'Fr_II_eTreg', 'CM_CD8', 'Th2', 'Tfh', 'Th17',
       'Mem_CD4', 'Th1']

# normalize up and down genes

def standard_scale(x):
    return (x-x.min())/x.max()

#sns.heatmap(u, cmap='inferno')


u = u.apply(standard_scale, axis=1)
d = d.apply(standard_scale, axis=1)

u = u.loc[:, ordered_cols]
d = d.loc[:, ordered_cols]


# collect all my cis-eqtls

lookup_cis(x)
lookup_tokyo(x)
eqtl_catalogue_LDblock_query_type_restricted_multitype(x)

# now all of these are in the Results object

gwas = ldlink.ldtrait(x)

In [17]:
tokyo = x.results.tokyo_eqtl().drop_duplicates()
tokyo = tokyo[['Gene_name','Variant_ID', 'Backward_slope', 'cell_type']]
tokyo

Unnamed: 0,Gene_name,Variant_ID,Backward_slope,cell_type
0,ARHGEF3,rs1354034,0.165671,Plasmablast


In [20]:
cat = x.results.eqtl_cat().drop_duplicates()
cat.T

Unnamed: 0,0,0.1
molecular_trait_id,ENSG00000163947,ENSG00000163947
gene_symbol,ARHGEF3,ARHGEF3
variant,chr3_56815721_T_C,chr3_56815721_T_C
chromosome,3,3
position,56815721,56815721
ref,T,T
alt,C,C
cs_id,ILMN_1781010_L1,ILMN_1781010_L1
cs_index,L1,L1
finemapped_region,chr3:56079329-58079329,chr3:56079329-58079329


In [10]:
gencis = x.results.eqtlgen_cis_df.drop_duplicates()

Unnamed: 0,Gene_name,Variant_ID,Backward_slope
0,ARHGEF3,rs1354034,0.165671


In [16]:
# the above cell does all the trans-eQTL processing and looks up all the cis-eQTL data

# now to summarize this into a single figure-like thing

# I need to show 4 tables (eqtlgen, eqtl_cat, immunexUT, GWAS catalog)

# this tables need to be filtered down, I need to learn the syntax for showing tables on matplotlib

# also the display syntax for showing seaborn clustermaps

In [28]:
# filter tables down

eqtl_cat = x.results.eqtl_cat()

if eqtl_cat.alt==x.EA:
    pass
elif eqtl_cat.alt==x.OA:
    eqtl_cat.z = -1 * eqtl_cat.z
    
eqtl_cat


AttributeError: 'DataFrame' object has no attribute 'ALT'

In [30]:
x.EA

'T'

In [31]:
x.results.eqtl_cat_df

Unnamed: 0,molecular_trait_id,gene_symbol,variant,chromosome,position,ref,alt,cs_id,cs_index,finemapped_region,...,cs_min_r2,cs_avg_r2,cs_size,posterior_mean,posterior_sd,cs_log10bf,int_chrom,study,study_type,cell_type
0,ENSG00000163947,ARHGEF3,chr3_56815721_T_C,3,56815721,T,C,ILMN_1781010_L1,L1,chr3:56079329-58079329,...,1.0,1.0,1,-0.876546369440084,0.0610111260567196,39.766475249425,3,CEDAR,ge,platelet


In [18]:
x.results.tokyo_eqtl_df

Unnamed: 0,Gene_id,Gene_name,CHR,TSS_position,Number_of_variants_cis,Variant_ID,OA,EA,Variant_CHR,Variant_position_start,Variant_position_end,Rank_of_association,Forward_nominal_P,Forward_slope,Backward_P,Backward_slope,int_chrom,cell_type
0,ENSG00000163947.11,ARHGEF3,chr3,57079329,5997,rs1354034,T,C,chr3,56815721,56815721,0,1.75008e-05,,1.75008e-05,0.165671,3,Plasmablast
0,ENSG00000163947.11,ARHGEF3,chr3,57079329,5997,rs1354034,T,C,chr3,56815721,56815721,0,1.75008e-05,,1.75008e-05,0.165671,3,Plasmablast


In [19]:
x.results.eqtlgen_cis()

Unnamed: 0,Pvalue,SNP,SNPChr,SNPPos,AssessedAllele,OtherAllele,Zscore,Gene,GeneSymbol,GeneChr,GenePos,NrCohorts,NrSamples,FDR,BonferroniP
0,3.3396e-07,rs1354034,3,56815721,T,C,5.1032,ENSG00000163947,ARHGEF3,3,56937401,37,31684,0.00102408142999,1.0
0,3.3396e-07,rs1354034,3,56815721,T,C,5.1032,ENSG00000163947,ARHGEF3,3,56937401,37,31684,0.00102408142999,1.0


def my_plotter(ax, data1, data2, param_dict):
    """
    A helper function to make a graph

    Parameters
    ----------
    ax : Axes
        The axes to draw to

    data1 : array
       The x data

    data2 : array
       The y data

    param_dict : dict
       Dictionary of keyword arguments to pass to ax.plot

    Returns
    -------
    out : list
        list of artists added
    """
    out = ax.plot(data1, data2, **param_dict)
    
    return out