In [12]:
## final versions of functions that are meant to take in a Variant object and extract rows from the different eQTL resources

import os 
import pandas as pd

from helpers.getpaths import *
from helpers.ldlink import *

class Variant():
    def __init__(self, rsid:str, chrom:int, pos:int, EA:str, OA:str):
        self.rsid = rsid 
        self.chrom = chrom
        self.pos = pos
        self.EA = EA
        self.OA = OA
        self.LDblock = None
        self.set_LDblock()
    
    def get_rsid(self):
        return self.rsid
    
    def get_fullpos(self):
        return self.chrom, self.pos
    
    def get_pos(self):
        return self.pos
    
    def get_chrom(self):
        return self.chrom
    
    def get_EA(self):
        return self.EA
    
    def get_OA(self):
        return self.OA
    
    def get_LDblock(self):
        return self.LDblock
    
    def map_alleles(self, x, EA, OA):
        d = {}
        a1 = x.split(",")[0].split("=")
        a2 = x.split(",")[1].split("=")
        if a1[0]==EA:
            d[EA]=a1[1]
            d[OA]=a2[1]
        elif a1[0]==OA:
            d[OA] = a1[1]
            d[EA] = a2[1]

        return d
    
    def set_LDblock(self):
        df = ldproxy(self.rsid)
        df['EA'] = df.Correlated_Alleles.apply(lambda x:self.map_alleles(x,EA,OA)[EA])
        df['OA'] = df.Correlated_Alleles.apply(lambda x:self.map_alleles(x,EA,OA)[OA])
        df['chrom'] = df.Coord.apply(lambda x:int(x.split(":")[0][-1]))
        df['hg38_pos'] = df.Coord.apply(lambda x:int(x.split(":")[1]))
        df = df[['RS_Number', 'chrom', 'hg38_pos', 'EA','OA', 'R2', 'MAF']]
        self.LDblock = df

In [13]:
# little test case 

rsid = "rs876037"
chrom = 7
pos = 50308692
EA = "A"
OA = "T"

snp = Variant(rsid, chrom, pos, EA, OA)

In [48]:
def get_tokyocis(variant, paths=paths):
    files = os.listdir(paths['eqtl_tokyo'])
    files = [paths['eqtl_tokyo'] + i for i in files]
    
    df_list = []
    
    for file in files:
        cell = file.split("/")[-1].split("_cond")[0]
        x = pd.read_csv(file, sep='\t')
        x = x[x.Variant_CHR==("chr" + str(snp.chrom))]
        x = x[x.Variant_position_start.isin(snp.LDblock.hg38_pos)]
        x['cell'] = cell
        df_list.append(x)
    
    return pd.concat(df_list)

In [49]:
x = get_tokyocis(snp)