In [1]:
import argparse
import requests
import numpy as np
import pandas as pd
import hail as hl
from hail.expr.functions import _sort_by
from hail.utils import hadoop_open, new_temp_file
import pyranges as pr
from ukbb_pan_ancestry import (
    get_distance_clumping_results_path,
    get_known_ukbb_loci_path,
    get_meta_analysis_results_path,
    get_munged_otg_v2d_path,
    get_munged_round2_path,
    get_pheno_manifest_path,
    get_ukb_pheno_efo_mapping_path,
    load_final_sumstats_mt,
    otg_release,
)

In [None]:
#--------------------------- 
# 09/20/2024
# expand 'region' to within 1Mb of lead variants
#---------------------------

In [2]:
def _to_pandas(ht):
            df = ht.to_pandas()
            # a temporary fix: cf. https://github.com/biocore-ntnu/pyranges/pull/264
            df = df.astype({k: "object" if v == "string[python]" else str(v).lower() for k, v in df.dtypes.items()})
            return df

In [None]:
# otg lead variants
#ht_v2d = hl.read_table("/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/munged_otg_v2d_no_exclusion.ht")
#ht_v2d = ht_v2d.annotate(
#    study_id=hl.delimit(ht_v2d.study_id),
#    Chromosome=ht_v2d.locus.contig,
#    Start=ht_v2d.locus.position,
#    End=ht_v2d.locus.position,
#)
#df_v2d = _to_pandas(ht_v2d)
print(df_v2d.head())
#gr_v2d = pr.PyRanges(df_v2d)
print(gr_v2d.head())

      locus    trait_efo           study_id                    trait_efo_term  \
0  1:768253  EFO_0006312       GCST90026371     mitochondrial DNA measurement   
1  1:839538  EFO_0006865  NEALE2_20002_1202      urgency urinary incontinence   
2  1:858051  EFO_0004617       GCST90025945            cystatin c measurement   
3  1:891059  EFO_0004533       GCST90019494  alkaline phosphatase measurement   
4  1:894573  EFO_0002618         GCST005434              pancreatic carcinoma   

         trait_efo_category Chromosome   Start     End  
0         Other measurement          1  768253  768253  
1               Other trait          1  839538  839538  
2         Other measurement          1  858051  858051  
3  Liver enzyme measurement          1  891059  891059  
4                    Cancer          1  894573  894573  
+------------+-------------+----------------------------------+-------+
| locus      | trait_efo   | study_id                         | +5    |
| (object)   | (object)    

In [None]:
trait = "ALCO_AMOUNT"
# kcps2 lead variants
df = pd.read_table(f"/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/KCPS2_loci/GenomicRiskLoci_{trait}.txt", delimiter="\t")
print(df)

# pyrange
gr = pr.PyRanges(df)
print(gr)

   idx    lead_locus                  region    trait_efo trait_efo_category  \
0    1    3:38052725     3:38052725-38052725  EFO_0007878  Other measurement   
1    2   4:100239319   4:100000136-100336448  EFO_0007878  Other measurement   
2    3    6:26207175     6:26180634-26326182  EFO_0007878  Other measurement   
3    4  12:108700164  12:108248685-108701635  EFO_0007878  Other measurement   
4    5  12:112241766  12:111414461-113117897  EFO_0007878  Other measurement   

   Chromosome      Start        End  
0           3   38052725   38052725  
1           4  100000136  100336448  
2           6   26180634   26326182  
3          12  108248685  108701635  
4          12  111414461  113117897  
+-----------+--------------+------------------------+-------------+-------+
|       idx | lead_locus   | region                 | trait_efo   | +4    |
|   (int64) | (object)     | (object)               | (object)    | ...   |
|-----------+--------------+------------------------+----------





In [None]:
gr['4', 100000136:100336445]

Unnamed: 0,idx,lead_locus,region,trait_efo,trait_efo_category,Chromosome,Start,End
1,2,4:100239319,4:100000136-100336448,EFO_0007878,Other measurement,4,100000136,100336448


In [12]:
df_annotated = gr.join(gr_v2d, how="left", suffix="_otg").as_df()
#df_annotated = df_annotated.drop(columns=["Chromosome", "Start", "End", "Start_otg", "End_otg"])
print(df_annotated)

      idx    lead_locus                  region    trait_efo  \
0       1    3:38052725     3:38052725-38052725  EFO_0007878   
1       2   4:100239319   4:100000136-100336448  EFO_0007878   
2       2   4:100239319   4:100000136-100336448  EFO_0007878   
3       2   4:100239319   4:100000136-100336448  EFO_0007878   
4       2   4:100239319   4:100000136-100336448  EFO_0007878   
...   ...           ...                     ...          ...   
1259    5  12:112241766  12:111414461-113117897  EFO_0007878   
1260    5  12:112241766  12:111414461-113117897  EFO_0007878   
1261    5  12:112241766  12:111414461-113117897  EFO_0007878   
1262    5  12:112241766  12:111414461-113117897  EFO_0007878   
1263    5  12:112241766  12:111414461-113117897  EFO_0007878   

     trait_efo_category Chromosome      Start        End         locus  \
0     Other measurement          3   38052725   38052725            -1   
1     Other measurement          4  100000136  100336448   4:100006645   
2     Oth

In [7]:
ht_v2d = hl.read_table("/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/munged_otg_v2d_no_exclusion.ht")
ht_v2d.show(3)

locus,trait_efo,study_id,trait_efo_term,trait_efo_category
locus<GRCh37>,str,set<str>,str,str
1:768253,"""EFO_0006312""","{""GCST90026371""}","""mitochondrial DNA measurement""","""Other measurement"""
1:839538,"""EFO_0006865""","{""NEALE2_20002_1202""}","""urgency urinary incontinence""","""Other trait"""
1:858051,"""EFO_0004617""","{""GCST90025945""}","""cystatin c measurement""","""Other measurement"""


In [17]:
# otg lead variants
ht_v2d = hl.read_table("/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/munged_otg_v2d_no_exclusion.ht")
ht_v2d = ht_v2d.annotate(
    study_id=hl.delimit(ht_v2d.study_id),
    Chromosome=ht_v2d.locus.contig,
    Start=ht_v2d.locus.position,
    End=ht_v2d.locus.position,
)
ht_v2d.show(3)

locus,trait_efo,study_id,trait_efo_term,trait_efo_category,Chromosome,Start,End
locus<GRCh37>,str,str,str,str,str,int32,int32
1:768253,"""EFO_0006312""","""GCST90026371""","""mitochondrial DNA measurement""","""Other measurement""","""1""",768253,768253
1:839538,"""EFO_0006865""","""NEALE2_20002_1202""","""urgency urinary incontinence""","""Other trait""","""1""",839538,839538
1:858051,"""EFO_0004617""","""GCST90025945""","""cystatin c measurement""","""Other measurement""","""1""",858051,858051


In [15]:
df_v2d = _to_pandas(ht_v2d)
print(df_v2d.head())

[Stage 26:>                                                         (0 + 2) / 2]

      locus    trait_efo           study_id                    trait_efo_term  \
0  1:768253  EFO_0006312       GCST90026371     mitochondrial DNA measurement   
1  1:839538  EFO_0006865  NEALE2_20002_1202      urgency urinary incontinence   
2  1:858051  EFO_0004617       GCST90025945            cystatin c measurement   
3  1:891059  EFO_0004533       GCST90019494  alkaline phosphatase measurement   
4  1:894573  EFO_0002618         GCST005434              pancreatic carcinoma   

         trait_efo_category Chromosome   Start     End  
0         Other measurement          1  768253  768253  
1               Other trait          1  839538  839538  
2         Other measurement          1  858051  858051  
3  Liver enzyme measurement          1  891059  891059  
4                    Cancer          1  894573  894573  


In [20]:
#========== EAS biobanks as ref ===========#
ht_v2d = hl.import_table("/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/GWAS_Catalog/KoGES_otg_form.txt", delimiter='\t', impute=True)

# Strip the "Chr" prefix from the locus string (e.g., "Chr2:27730940" -> "2:27730940")
ht_v2d = ht_v2d.annotate(locus_stripped = hl.str(ht_v2d.locus).replace("^Chr", ""))

# Parse the modified locus field to a proper Hail locus object
ht_v2d = ht_v2d.annotate(locus = hl.parse_locus(ht_v2d.locus_stripped, reference_genome='GRCh37'))

# Now you can access the contig and position attributes
ht_v2d = ht_v2d.annotate(
    study_id = ht_v2d.study_id,
    Chromosome = ht_v2d.locus.contig,
    Start = ht_v2d.locus.position,
    End = ht_v2d.locus.position
)
df_v2d = _to_pandas(ht_v2d)
print(df_v2d.head())

         locus    trait_efo      study_id trait_efo_term trait_efo_category  \
0  15:89804417  EFO_0004339  GCST90255478    body height        body height   
1  15:90141591  EFO_0004339  GCST90255478    body height        body height   
2  15:90619081  EFO_0004339  GCST90255478    body height        body height   
3  15:99192483  EFO_0004339  GCST90255478    body height        body height   
4  15:99240993  EFO_0004339  GCST90255478    body height        body height   

  locus_stripped Chromosome     Start       End  
0    15:89804417         15  89804417  89804417  
1    15:90141591         15  90141591  90141591  
2    15:90619081         15  90619081  90619081  
3    15:99192483         15  99192483  99192483  
4    15:99240993         15  99240993  99240993  


In [3]:
def annotate_known_loci_1Mb_EAS(trait):

    #========== EAS biobanks as ref ===========#
    ht_v2d = hl.import_table("/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/GWAS_Catalog/KoGES_TWB_otg_form.txt", delimiter='\t', impute=True)

    # Strip the "Chr" prefix from the locus string (e.g., "Chr2:27730940" -> "2:27730940")
    #ht_v2d = ht_v2d.annotate(locus_stripped=hl.str(ht_v2d.locus).replace("^Chr", ""))
    
    # Replace '23' with 'X' before parsing the locus
    ht_v2d = ht_v2d.annotate(
        locus=hl.if_else(ht_v2d.locus.startswith("23:"), 
                                  hl.str(ht_v2d.locus).replace("23:", "X:"), 
                                  ht_v2d.locus)
    )
    
    # Parse the modified locus field to a proper Hail locus object
    ht_v2d = ht_v2d.annotate(locus=hl.parse_locus(ht_v2d.locus, reference_genome='GRCh37'))
    
    # Now you can access the contig and position attributes
    ht_v2d = ht_v2d.annotate(
        study_id = ht_v2d.study_id,
        Chromosome = ht_v2d.locus.contig,
        Start = ht_v2d.locus.position,
        End = ht_v2d.locus.position
    )
    
    df_v2d = _to_pandas(ht_v2d)
    #print(df_v2d.head())

    # kcps2 lead variants
    df = pd.read_table(f"/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/KCPS2_loci/GenomicRiskLoci_{trait}_1Mb.txt", delimiter="\t")
    #print(df.head())
    
    # pyrange
    gr = pr.PyRanges(df)
    #print(gr.head())

    gr_v2d = pr.PyRanges(df_v2d)
    #print(gr_v2d.head())

    df_annotated = gr.join(gr_v2d, how="left", suffix="_otg").as_df()
    df_annotated = df_annotated.drop(columns=["Chromosome", "Start", "End", "Start_otg", "End_otg"])

    # write out and read back b/c hl.Table.from_pandas is extreamly slow
    tmpfile = new_temp_file()
    with hadoop_open(tmpfile, "wb") as f:
        df_annotated.to_csv(f, sep="\t", na_rep="NA", index=False)

    ht = hl.import_table(tmpfile, impute=True, min_partitions=1000)
    ht = ht.rename({"locus": "locus_otg", "study_id": "study_id_otg"})
    ht = ht.annotate(
        **{
            key: hl.if_else(ht[key] == "-1", hl.missing(hl.tstr), ht[key])
            for key in filter(lambda x: x.endswith("_otg"), list(ht.row))
        }
    )
    ht = ht.annotate(
        lead_locus=hl.parse_locus(ht.lead_locus),
        region=hl.parse_locus_interval(ht.region),
        locus=hl.parse_locus(ht.locus_otg),
        study_id_otg=ht.study_id_otg.split(","),
    )
    ht.show(3)

    for field in ["trait_efo", "trait_efo_category"]:
        x = df_annotated[~df_annotated[field].isna()]
        known_idx = set(x[x[field] == x[f"{field}_otg"]].idx)
        novel_idx = set(x.idx) - set(known_idx)
        na_idx = set(df_annotated.idx) - set(x.idx)
        na_match = na_idx.intersection(
            set(df_annotated[df_annotated[field].isna() & ~df_annotated.locus.isna()].idx)
        )
        k = "efo" if field == "trait_efo" else "efo_category"
        ht = ht.annotate(
            **{
                f"is_known_{k}": hl.case()
                #.when(hl.set(known_idx).contains(ht.idx), True)
                #.when(hl.set(novel_idx).contains(ht.idx), False)
                .when(hl.set(known_idx if len(known_idx) > 0 else hl.empty_set(ht.idx.dtype)).contains(ht.idx), True)
                .when(hl.set(novel_idx if len(novel_idx) > 0 else hl.empty_set(ht.idx.dtype)).contains(ht.idx), False)
                .default(hl.missing(hl.tbool))
            }
        )
        print(field)
        # category known
        print(len(known_idx))
        # category novel
        print(len(novel_idx))
        # category NA
        print(len(na_idx))
        # category NA match
        print(len(na_match))
        
    #N_novel = len(novel_idx)
    #N_novel_cat = len(novel_idx)
    #res_count = pd.DataFrame({'trait': [trait], 'Number of loci': [N_total], 'Number of novel loci': [N_unrep]})
    
    ht.export(f"/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/KCPS2_otg/known_kcps2_loci_{trait}_1Mb_EAS.tsv")


In [4]:
def annotate_known_loci_500kb_EAS(trait):

    #========== EAS biobanks as ref ===========#
    ht_v2d = hl.import_table("/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/GWAS_Catalog/KoGES_TWB_otg_form.txt", delimiter='\t', impute=True)

    # Strip the "Chr" prefix from the locus string (e.g., "Chr2:27730940" -> "2:27730940")
    #ht_v2d = ht_v2d.annotate(locus_stripped=hl.str(ht_v2d.locus).replace("^Chr", ""))
    
    # Replace '23' with 'X' before parsing the locus
    ht_v2d = ht_v2d.annotate(
        locus=hl.if_else(ht_v2d.locus.startswith("23:"), 
                                  hl.str(ht_v2d.locus).replace("23:", "X:"), 
                                  ht_v2d.locus)
    )
    
    # Parse the modified locus field to a proper Hail locus object
    ht_v2d = ht_v2d.annotate(locus=hl.parse_locus(ht_v2d.locus, reference_genome='GRCh37'))
    
    # Now you can access the contig and position attributes
    ht_v2d = ht_v2d.annotate(
        study_id = ht_v2d.study_id,
        Chromosome = ht_v2d.locus.contig,
        Start = ht_v2d.locus.position,
        End = ht_v2d.locus.position
    )
    df_v2d = _to_pandas(ht_v2d)
    #print(df_v2d.head())

    # kcps2 lead variants
    df = pd.read_table(f"/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/KCPS2_loci/GenomicRiskLoci_{trait}_500kb.txt", delimiter="\t")
    #print(df.head())
    
    # pyrange
    gr = pr.PyRanges(df)
    #print(gr.head())

    gr_v2d = pr.PyRanges(df_v2d)
    #print(gr_v2d.head())

    df_annotated = gr.join(gr_v2d, how="left", suffix="_otg").as_df()
    df_annotated = df_annotated.drop(columns=["Chromosome", "Start", "End", "Start_otg", "End_otg"])

    # write out and read back b/c hl.Table.from_pandas is extreamly slow
    tmpfile = new_temp_file()
    with hadoop_open(tmpfile, "wb") as f:
        df_annotated.to_csv(f, sep="\t", na_rep="NA", index=False)

    ht = hl.import_table(tmpfile, impute=True, min_partitions=1000)
    ht = ht.rename({"locus": "locus_otg", "study_id": "study_id_otg"})
    ht = ht.annotate(
        **{
            key: hl.if_else(ht[key] == "-1", hl.missing(hl.tstr), ht[key])
            for key in filter(lambda x: x.endswith("_otg"), list(ht.row))
        }
    )
    ht = ht.annotate(
        lead_locus=hl.parse_locus(ht.lead_locus),
        region=hl.parse_locus_interval(ht.region),
        locus=hl.parse_locus(ht.locus_otg),
        study_id_otg=ht.study_id_otg.split(","),
    )
    ht.show(3)

    for field in ["trait_efo", "trait_efo_category"]:
        x = df_annotated[~df_annotated[field].isna()]
        known_idx = set(x[x[field] == x[f"{field}_otg"]].idx)
        novel_idx = set(x.idx) - set(known_idx)
        na_idx = set(df_annotated.idx) - set(x.idx)
        na_match = na_idx.intersection(
            set(df_annotated[df_annotated[field].isna() & ~df_annotated.locus.isna()].idx)
        )
        k = "efo" if field == "trait_efo" else "efo_category"
        ht = ht.annotate(
            **{
                f"is_known_{k}": hl.case()
                #.when(hl.set(known_idx).contains(ht.idx), True)
                #.when(hl.set(novel_idx).contains(ht.idx), False)
                .when(hl.set(known_idx if len(known_idx) > 0 else hl.empty_set(ht.idx.dtype)).contains(ht.idx), True)
                .when(hl.set(novel_idx if len(novel_idx) > 0 else hl.empty_set(ht.idx.dtype)).contains(ht.idx), False)
                .default(hl.missing(hl.tbool))
            }
        )
        print(field)
        # category known
        print(len(known_idx))
        # category novel
        print(len(novel_idx))
        # category NA
        print(len(na_idx))
        # category NA match
        print(len(na_match))
        
    #N_novel = len(novel_idx)
    #N_novel_cat = len(novel_idx)
    #res_count = pd.DataFrame({'trait': [trait], 'Number of loci': [N_total], 'Number of novel loci': [N_unrep]})
    
    ht.export(f"/n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/KCPS2_otg/known_kcps2_loci_{trait}_500kb_EAS.tsv")


In [None]:
## 1Mb

In [18]:
trait_list = ["LDL", "TG", "HDL", "CHO", "FBS", "INSULIN","GOT", "GPT", "GGT", "BIL", "ALB","TSH", "CEA","CREAT","ADIPO","URIC", "ALP","HB", "HCT", "MCH", "MCHC", "MCV","RBC", "RDW", "WBC","PLT", "EOS", "SBP", "DBP", "WT",
              "HEIGHT", "BMI", "WAIST", "SMOKA_MOD", "ALCO_AMOUNT","COFFA"]
res = pd.DataFrame()
for i in range(len(trait_list)):
    trait = trait_list[i]
    print(trait)
    annotate_known_loci_1Mb_EAS(trait)    

LDL







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16505320,[1:15505320-1:17505320),"""EFO_0004611""","""Lipid or lipoprotein measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:16505320,[1:15505320-1:17505320),"""EFO_0004611""","""Lipid or lipoprotein measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:16505320,[1:15505320-1:17505320),"""EFO_0004611""","""Lipid or lipoprotein measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
62
46
0
0
trait_efo_category
0
108
0
0




TG







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:63145439,[1:62145439-1:64145439),"""EFO_0004530""","""Lipid or lipoprotein measurement""","""1:62853713""","""EFO_0004530""","[""GCST90255493""]","""triglyceride measurement""","""triglyceride measurement""",1:62853713
1,1:63145439,[1:62145439-1:64145439),"""EFO_0004530""","""Lipid or lipoprotein measurement""","""1:62853713""","""EFO_0004530""","[""GCST90255426""]","""triglyceride measurement""","""triglyceride measurement""",1:62853713
1,1:63145439,[1:62145439-1:64145439),"""EFO_0004530""","""Lipid or lipoprotein measurement""","""1:63043657""","""EFO_0004530""","[""GCST90255493""]","""triglyceride measurement""","""triglyceride measurement""",1:63043657


trait_efo
53
27
0
0
trait_efo_category
0
80
0
0




HDL







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:40064961,[1:39064961-1:41064961),"""EFO_0004612""","""Lipid or lipoprotein measurement""","""1:39457006""","""EFO_0004612""","[""GCST90255491""]","""high density lipoprotein cholesterol measurement""","""high density lipoprotein cholesterol measurement""",1:39457006
1,1:40064961,[1:39064961-1:41064961),"""EFO_0004612""","""Lipid or lipoprotein measurement""","""1:39976059""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:39976059
1,1:40064961,[1:39064961-1:41064961),"""EFO_0004612""","""Lipid or lipoprotein measurement""","""1:39976076""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:39976076


trait_efo
53
9
0
0
trait_efo_category
0
62
0
0




CHO







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:25829988,[1:24829988-1:26829988),"""EFO_0004574""","""Lipid or lipoprotein measurement""","""1:25051102""","""EFO_0004309""","[""GCST90278640""]","""platelet count""","""platelet count""",1:25051102
1,1:25829988,[1:24829988-1:26829988),"""EFO_0004574""","""Lipid or lipoprotein measurement""","""1:25607316""","""EFO_0004541""","[""GCST90255414""]","""HbA1c measurement""","""HbA1c measurement""",1:25607316
1,1:25829988,[1:24829988-1:26829988),"""EFO_0004574""","""Lipid or lipoprotein measurement""","""1:25611035""","""EFO_0004541""","[""GCST90255481""]","""HbA1c measurement""","""HbA1c measurement""",1:25611035


trait_efo
82
59
0
0
trait_efo_category
0
141
0
0




FBS







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:43453188,[1:42453188-1:44453188),"""EFO_0004468""","""Other measurement""","""1:43455283""","""EFO_0004465""","[""GCST90278628""]","""fasting blood glucose measurement""","""fasting blood glucose measurement""",1:43455283
1,1:43453188,[1:42453188-1:44453188),"""EFO_0004468""","""Other measurement""","""1:43805737""","""EFO_0004308""","[""GCST90278647""]","""leukocyte count""","""leukocyte count""",1:43805737
1,1:43453188,[1:42453188-1:44453188),"""EFO_0004468""","""Other measurement""","""1:43884714""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:43884714


trait_efo
47
40
0
0
trait_efo_category
0
87
0
0




INSULIN







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,2:27730940,[2:26730940-2:28730940),"""EFO_0004467""","""Other measurement""","""2:26909432""","""EFO_0006336""","[""GCST90278625""]","""diastolic blood pressure""","""diastolic blood pressure""",2:26909432
1,2:27730940,[2:26730940-2:28730940),"""EFO_0004467""","""Other measurement""","""2:26915624""","""EFO_0006335""","[""GCST90255406""]","""systolic blood pressure""","""systolic blood pressure""",2:26915624
1,2:27730940,[2:26730940-2:28730940),"""EFO_0004467""","""Other measurement""","""2:26915624""","""EFO_0006336""","[""GCST90255477""]","""diastolic blood pressure""","""diastolic blood pressure""",2:26915624


trait_efo
0
6
0
0
trait_efo_category
0
6
0
0




GOT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16516585,[1:15516585-1:17516585),"""EFO_0004736""","""Liver enzyme measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:16516585,[1:15516585-1:17516585),"""EFO_0004736""","""Liver enzyme measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:16516585,[1:15516585-1:17516585),"""EFO_0004736""","""Liver enzyme measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
42
28
0
0
trait_efo_category
0
70
0
0




GPT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:31886498,[1:30886498-1:32886498),"""EFO_0004735""","""Liver enzyme measurement""","""1:31707354""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:31707354
1,1:31886498,[1:30886498-1:32886498),"""EFO_0004735""","""Liver enzyme measurement""","""1:31872750""","""EFO_0004736""","[""GCST90255486""]","""aspartate aminotransferase measurement""","""aspartate aminotransferase measurement""",1:31872750
1,1:31886498,[1:30886498-1:32886498),"""EFO_0004735""","""Liver enzyme measurement""","""1:31881152""","""EFO_0004735""","[""GCST90255488""]","""serum alanine aminotransferase measurement""","""serum alanine aminotransferase measurement""",1:31881152


trait_efo
40
28
0
0
trait_efo_category
0
68
0
0




GGT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16505320,[1:15505320-1:17505320),"""EFO_0004532""","""Liver enzyme measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:16505320,[1:15505320-1:17505320),"""EFO_0004532""","""Liver enzyme measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:16505320,[1:15505320-1:17505320),"""EFO_0004532""","""Liver enzyme measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
77
27
0
0
trait_efo_category
0
104
0
0




BIL







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:156245606,[1:155245606-1:157245606),"""EFO_0004570""","""Other measurement""","""1:155485027""","""EFO_0004305""","[""GCST90255428""]","""erythrocyte count""","""erythrocyte count""",1:155485027
1,1:156245606,[1:155245606-1:157245606),"""EFO_0004570""","""Other measurement""","""1:155485027""","""EFO_0004348""","[""GCST90255430""]","""hematocrit""","""hematocrit""",1:155485027
1,1:156245606,[1:155245606-1:157245606),"""EFO_0004570""","""Other measurement""","""1:155485027""","""EFO_0004509""","[""GCST90255429""]","""hemoglobin measurement""","""hemoglobin measurement""",1:155485027


trait_efo
24
21
0
0
trait_efo_category
0
45
0
0




ALB







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16528218,[1:15528218-1:17528218),"""EFO_0004535""","""Other measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:16528218,[1:15528218-1:17528218),"""EFO_0004535""","""Other measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:16528218,[1:15528218-1:17528218),"""EFO_0004535""","""Other measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
35
12
0
0
trait_efo_category
0
47
0
0




TSH







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:19841174,[1:18841174-1:20841174),"""EFO_0004748""","""Other measurement""","""1:19964699""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:19964699
2,1:22515032,[1:21515032-1:23515032),"""EFO_0004748""","""Other measurement""","""1:22115291""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:22115291
2,1:22515032,[1:21515032-1:23515032),"""EFO_0004748""","""Other measurement""","""1:22355978""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:22355978


trait_efo
0
100
0
0
trait_efo_category
0
100
0
0




CEA







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,2:109504287,[2:108504287-2:110504287),"""EFO_0005760""","""Other measurement""","""2:109941182""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",2:109941182
2,2:242709272,[2:241709272-2:243199373),"""EFO_0005760""","""Other measurement""","""2:241802259""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",2:241802259
2,2:242709272,[2:241709272-2:243199373),"""EFO_0005760""","""Other measurement""","""2:241805851""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",2:241805851


trait_efo
0
16
0
0
trait_efo_category
0
16
0
0




CREAT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:10707812,[1:9707812-1:11707812),"""EFO_0004518""","""Other measurement""","""1:10702266""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10702266
1,1:10707812,[1:9707812-1:11707812),"""EFO_0004518""","""Other measurement""","""1:10709267""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10709267
1,1:10707812,[1:9707812-1:11707812),"""EFO_0004518""","""Other measurement""","""1:10722657""","""EFO_0004741""","[""GCST90255483""]","""blood urea nitrogen measurement""","""blood urea nitrogen measurement""",1:10722657


trait_efo
64
19
0
0
trait_efo_category
0
83
0
0




ADIPO







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,3:52529266,[3:51529266-3:53529266),"""EFO_0004502""","""Other measurement""","""3:51675348""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",3:51675348
1,3:52529266,[3:51529266-3:53529266),"""EFO_0004502""","""Other measurement""","""3:52177700""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",3:52177700
1,3:52529266,[3:51529266-3:53529266),"""EFO_0004502""","""Other measurement""","""3:52348364""","""EFO_0004535""","[""GCST90255484""]","""serum albumin measurement""","""serum albumin measurement""",3:52348364


trait_efo
0
13
0
0
trait_efo_category
0
13
0
0




URIC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:15914078,[1:14914078-1:16914078),"""EFO_0004761""","""Cardiovascular measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:15914078,[1:14914078-1:16914078),"""EFO_0004761""","""Cardiovascular measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:15914078,[1:14914078-1:16914078),"""EFO_0004761""","""Cardiovascular measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
38
62
0
0
trait_efo_category
0
100
0
0




ALP







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:21893344,[1:20893344-1:22893344),"""EFO_0004533""","""Liver enzyme measurement""","""1:22115291""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:22115291
1,1:21893344,[1:20893344-1:22893344),"""EFO_0004533""","""Liver enzyme measurement""","""1:22355978""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:22355978
1,1:21893344,[1:20893344-1:22893344),"""EFO_0004533""","""Liver enzyme measurement""","""1:22453324""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:22453324


trait_efo
0
33
0
0
trait_efo_category
0
33
0
0




HB







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:10796866,[1:9796866-1:11796866),"""EFO_0004509""","""Hematological measurement""","""1:10702266""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10702266
1,1:10796866,[1:9796866-1:11796866),"""EFO_0004509""","""Hematological measurement""","""1:10709267""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10709267
1,1:10796866,[1:9796866-1:11796866),"""EFO_0004509""","""Hematological measurement""","""1:10722657""","""EFO_0004741""","[""GCST90255483""]","""blood urea nitrogen measurement""","""blood urea nitrogen measurement""",1:10722657


trait_efo
73
69
0
0
trait_efo_category
0
142
0
0




HCT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16379015,[1:15379015-1:17379015),"""EFO_0004348""","""Hematological measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:16379015,[1:15379015-1:17379015),"""EFO_0004348""","""Hematological measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:16379015,[1:15379015-1:17379015),"""EFO_0004348""","""Hematological measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
38
11
0
0
trait_efo_category
0
49
0
0




MCH







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:118261993,[1:117261993-1:119261993),"""EFO_0004527""","""Hematological measurement""","""1:118155145""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:118155145
1,1:118261993,[1:117261993-1:119261993),"""EFO_0004527""","""Hematological measurement""","""1:118155145""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:118155145
1,1:118261993,[1:117261993-1:119261993),"""EFO_0004527""","""Hematological measurement""","""1:118155620""","""EFO_0004309""","[""GCST90278640""]","""platelet count""","""platelet count""",1:118155620


trait_efo
0
77
0
0
trait_efo_category
0
77
0
0




MCHC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:158580069,[1:157580069-1:159580069),"""EFO_0004528""","""Hematological measurement""","""1:158582552""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:158582552
1,1:158580069,[1:157580069-1:159580069),"""EFO_0004528""","""Hematological measurement""","""1:158585415""","""EFO_0004541""","[""GCST90255481""]","""HbA1c measurement""","""HbA1c measurement""",1:158585415
1,1:158580069,[1:157580069-1:159580069),"""EFO_0004528""","""Hematological measurement""","""1:158586966""","""EFO_0004570""","[""GCST90255487""]","""bilirubin measurement""","""bilirubin measurement""",1:158586966


trait_efo
0
30
0
0
trait_efo_category
0
30
0
0




MCV







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:26872832,[1:25872832-1:27872832),"""EFO_0004526""","""Hematological measurement""","""1:25890050""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:25890050
1,1:26872832,[1:25872832-1:27872832),"""EFO_0004526""","""Hematological measurement""","""1:26602550""","""EFO_0004309""","[""GCST90278640""]","""platelet count""","""platelet count""",1:26602550
1,1:26872832,[1:25872832-1:27872832),"""EFO_0004526""","""Hematological measurement""","""1:26643593""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:26643593


trait_efo
0
133
0
0
trait_efo_category
0
133
0
0




RBC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16370791,[1:15370791-1:17370791),"""EFO_0004305""","""Hematological measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:16370791,[1:15370791-1:17370791),"""EFO_0004305""","""Hematological measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:16370791,[1:15370791-1:17370791),"""EFO_0004305""","""Hematological measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
81
23
0
0
trait_efo_category
0
104
0
0




RDW







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:25811488,[1:24811488-1:26811488),"""EFO_0005192""","""Hematological measurement""","""1:25051102""","""EFO_0004309""","[""GCST90278640""]","""platelet count""","""platelet count""",1:25051102
1,1:25811488,[1:24811488-1:26811488),"""EFO_0005192""","""Hematological measurement""","""1:25607316""","""EFO_0004541""","[""GCST90255414""]","""HbA1c measurement""","""HbA1c measurement""",1:25607316
1,1:25811488,[1:24811488-1:26811488),"""EFO_0005192""","""Hematological measurement""","""1:25611035""","""EFO_0004541""","[""GCST90255481""]","""HbA1c measurement""","""HbA1c measurement""",1:25611035


trait_efo
0
68
0
0
trait_efo_category
0
68
0
0




WBC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:36949623,[1:35949623-1:37949623),"""EFO_0004308""","""Hematological measurement""","""1:36880829""","""EFO_0004308""","[""GCST90255494""]","""leukocyte count""","""leukocyte count""",1:36880829
1,1:36949623,[1:35949623-1:37949623),"""EFO_0004308""","""Hematological measurement""","""1:36943916""","""EFO_0004308""","[""GCST90278647""]","""leukocyte count""","""leukocyte count""",1:36943916
1,1:36949623,[1:35949623-1:37949623),"""EFO_0004308""","""Hematological measurement""","""1:36969334""","""EFO_0004308""","[""GCST90255494""]","""leukocyte count""","""leukocyte count""",1:36969334


trait_efo
45
17
0
0
trait_efo_category
0
62
0
0




PLT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:1273278,[1:273278-1:2273278),"""EFO_0004309""","""Hematological measurement""","""1:1316674""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:1316674
2,1:1840038,[1:840038-1:2840038),"""EFO_0004309""","""Hematological measurement""","""1:1316674""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:1316674
2,1:1840038,[1:840038-1:2840038),"""EFO_0004309""","""Hematological measurement""","""1:2568425""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:2568425


trait_efo
188
79
0
0
trait_efo_category
0
267
0
0




EOS







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:155090013,[1:154090013-1:156090013),"""EFO_0004842""","""Hematological measurement""","""1:154123946""","""EFO_0004305""","[""GCST90255495""]","""erythrocyte count""","""erythrocyte count""",1:154123946
1,1:155090013,[1:154090013-1:156090013),"""EFO_0004842""","""Hematological measurement""","""1:154157229""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:154157229
1,1:155090013,[1:154090013-1:156090013),"""EFO_0004842""","""Hematological measurement""","""1:154237638""","""EFO_0004532""","[""GCST90255490""]","""serum gamma-glutamyl transferase measurement""","""serum gamma-glutamyl transferase measurement""",1:154237638


trait_efo
0
29
0
0
trait_efo_category
0
29
0
0




SBP







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
2,1:28793149,[1:27793149-1:29793149),"""EFO_0006335""","""Cardiovascular measurement""","""1:27971092""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:27971092
2,1:28793149,[1:27793149-1:29793149),"""EFO_0006335""","""Cardiovascular measurement""","""1:27972081""","""EFO_0004338""","[""GCST90255479""]","""body weight""","""body weight""",1:27972081
2,1:28793149,[1:27793149-1:29793149),"""EFO_0006335""","""Cardiovascular measurement""","""1:28298951""","""EFO_0004574""","[""GCST90255489""]","""total cholesterol measurement""","""total cholesterol measurement""",1:28298951


trait_efo
39
22
0
0
trait_efo_category
0
61
0
0




DBP







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:230852551,[1:229852551-1:231852551),"""EFO_0006336""","""Cardiovascular measurement""","""1:230138522""","""EFO_0004612""","[""GCST90255491""]","""high density lipoprotein cholesterol measurement""","""high density lipoprotein cholesterol measurement""",1:230138522
1,1:230852551,[1:229852551-1:231852551),"""EFO_0006336""","""Cardiovascular measurement""","""1:230294916""","""EFO_0004530""","[""GCST90255493""]","""triglyceride measurement""","""triglyceride measurement""",1:230294916
1,1:230852551,[1:229852551-1:231852551),"""EFO_0006336""","""Cardiovascular measurement""","""1:230294916""","""EFO_0004612""","[""GCST90255491""]","""high density lipoprotein cholesterol measurement""","""high density lipoprotein cholesterol measurement""",1:230294916


trait_efo
29
12
0
0
trait_efo_category
0
41
0
0




WT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:51433687,[1:50433687-1:52433687),"""EFO_0004338""","""Body measurement""","""1:50964446""","""EFO_0004339""","[""GCST90255411""]","""body height""","""body height""",1:50964446
1,1:51433687,[1:50433687-1:52433687),"""EFO_0004338""","""Body measurement""","""1:51103268""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:51103268
1,1:51433687,[1:50433687-1:52433687),"""EFO_0004338""","""Body measurement""","""1:51179515""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:51179515


trait_efo
71
17
0
0
trait_efo_category
0
88
0
0




HEIGHT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:1559703,[1:559703-1:2559703),"""EFO_0004339""","""Body measurement""","""1:1316674""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:1316674
2,1:3417131,[1:2417131-1:4417131),"""EFO_0004339""","""Body measurement""","""1:2568425""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:2568425
2,1:3417131,[1:2417131-1:4417131),"""EFO_0004339""","""Body measurement""","""1:2798205""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:2798205


trait_efo
375
53
0
0
trait_efo_category
0
428
0
0




BMI







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
2,1:74992546,[1:73992546-1:75992546),"""EFO_0004340""","""Body measurement""","""1:74968574""","""EFO_0004351""","[""GCST90278636""]","""resting heart rate""","""resting heart rate""",1:74968574
2,1:74992546,[1:73992546-1:75992546),"""EFO_0004340""","""Body measurement""","""1:75006027""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:75006027
3,1:75790373,[1:74790373-1:76790373),"""EFO_0004340""","""Body measurement""","""1:74968574""","""EFO_0004351""","[""GCST90278636""]","""resting heart rate""","""resting heart rate""",1:74968574


trait_efo
107
61
0
0
trait_efo_category
0
168
0
0




WAIST







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:177889025,[1:176889025-1:178889025),"""EFO_0004342""","""Body measurement""","""1:177810571""","""EFO_0004340""","[""GCST90255413""]","""body mass index""","""body mass index""",1:177810571
1,1:177889025,[1:176889025-1:178889025),"""EFO_0004342""","""Body measurement""","""1:177813039""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:177813039
1,1:177889025,[1:176889025-1:178889025),"""EFO_0004342""","""Body measurement""","""1:177835040""","""EFO_0004340""","[""GCST90255413""]","""body mass index""","""body mass index""",1:177835040


trait_efo
15
28
0
0
trait_efo_category
0
43
0
0




SMOKA_MOD







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,20:61993767,[20:60993767-20:62993767),"""EFO_0004318""","""Biological process""","""20:61273960""","""EFO_0004468""","[""GCST90255482""]","""glucose measurement""","""glucose measurement""",20:61273960
1,20:61993767,[20:60993767-20:62993767),"""EFO_0004318""","""Biological process""","""20:61992005""","""EFO_0004308""","[""GCST90255494""]","""leukocyte count""","""leukocyte count""",20:61992005
1,20:61993767,[20:60993767-20:62993767),"""EFO_0004318""","""Biological process""","""20:62372706""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",20:62372706


trait_efo
0
1
0
0
trait_efo_category
0
1
0
0




ALCO_AMOUNT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,3:38052725,[3:37052725-3:39052725),"""EFO_0007878""","""Other measurement""","""3:38052613""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",3:38052613
1,3:38052725,[3:37052725-3:39052725),"""EFO_0007878""","""Other measurement""","""3:38444489""","""EFO_0004518""","[""GCST90278624""]","""creatinine measurement""","""creatinine measurement""",3:38444489
1,3:38052725,[3:37052725-3:39052725),"""EFO_0007878""","""Other measurement""","""3:38484175""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",3:38484175


trait_efo
3
2
0
0
trait_efo_category
0
5
0
0




COFFA







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,4:89020503,[4:88020503-4:90020503),"""EFO_0006781""","""Other measurement""","""4:88022709""","""EFO_0004612""","[""GCST90255491""]","""high density lipoprotein cholesterol measurement""","""high density lipoprotein cholesterol measurement""",4:88022709
1,4:89020503,[4:88020503-4:90020503),"""EFO_0006781""","""Other measurement""","""4:88039000""","""EFO_0004570""","[""GCST90255487""]","""bilirubin measurement""","""bilirubin measurement""",4:88039000
1,4:89020503,[4:88020503-4:90020503),"""EFO_0006781""","""Other measurement""","""4:88040909""","""EFO_0004348""","[""GCST90255496""]","""hematocrit""","""hematocrit""",4:88040909


trait_efo
0
3
0
0
trait_efo_category
0
3
0
0




In [None]:
## 500kb

In [20]:
trait_list = ["LDL", "TG", "HDL", "CHO", "FBS", "INSULIN","GOT", "GPT", "GGT", "BIL", "ALB","TSH", "CEA","CREAT","ADIPO","URIC", "ALP","HB", "HCT", "MCH", "MCHC", "MCV","RBC", "RDW", "WBC","PLT", "EOS", "SBP", "DBP", "WT",
              "HEIGHT", "BMI", "WAIST", "SMOKA_MOD", "ALCO_AMOUNT","COFFA"]
res = pd.DataFrame()
for i in range(len(trait_list)):
    trait = trait_list[i]
    print(trait)
    annotate_known_loci_500kb_EAS(trait)    

LDL







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16505320,[1:16005320-1:17005320),"""EFO_0004611""","""Lipid or lipoprotein measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278620""]","""bone density""","""bone density""",1:16184747
1,1:16505320,[1:16005320-1:17005320),"""EFO_0004611""","""Lipid or lipoprotein measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278621""]","""bone density""","""bone density""",1:16184747
1,1:16505320,[1:16005320-1:17005320),"""EFO_0004611""","""Lipid or lipoprotein measurement""","""1:16364255""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:16364255


trait_efo
58
50
0
0
trait_efo_category
0
108
0
0




TG







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:63145439,[1:62645439-1:63645439),"""EFO_0004530""","""Lipid or lipoprotein measurement""","""1:62853713""","""EFO_0004530""","[""GCST90255493""]","""triglyceride measurement""","""triglyceride measurement""",1:62853713
1,1:63145439,[1:62645439-1:63645439),"""EFO_0004530""","""Lipid or lipoprotein measurement""","""1:62853713""","""EFO_0004530""","[""GCST90255426""]","""triglyceride measurement""","""triglyceride measurement""",1:62853713
1,1:63145439,[1:62645439-1:63645439),"""EFO_0004530""","""Lipid or lipoprotein measurement""","""1:63043657""","""EFO_0004530""","[""GCST90255493""]","""triglyceride measurement""","""triglyceride measurement""",1:63043657


trait_efo
52
28
0
0
trait_efo_category
0
80
0
0




HDL







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:40064961,[1:39564961-1:40564961),"""EFO_0004612""","""Lipid or lipoprotein measurement""","""1:39976059""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:39976059
1,1:40064961,[1:39564961-1:40564961),"""EFO_0004612""","""Lipid or lipoprotein measurement""","""1:39976076""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:39976076
1,1:40064961,[1:39564961-1:40564961),"""EFO_0004612""","""Lipid or lipoprotein measurement""","""1:39980885""","""EFO_0004530""","[""GCST90278645""]","""triglyceride measurement""","""triglyceride measurement""",1:39980885


trait_efo
52
10
0
0
trait_efo_category
0
62
0
0




CHO







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:25829988,[1:25329988-1:26329988),"""EFO_0004574""","""Lipid or lipoprotein measurement""","""1:25607316""","""EFO_0004541""","[""GCST90255414""]","""HbA1c measurement""","""HbA1c measurement""",1:25607316
1,1:25829988,[1:25329988-1:26329988),"""EFO_0004574""","""Lipid or lipoprotein measurement""","""1:25611035""","""EFO_0004541""","[""GCST90255481""]","""HbA1c measurement""","""HbA1c measurement""",1:25611035
1,1:25829988,[1:25329988-1:26329988),"""EFO_0004574""","""Lipid or lipoprotein measurement""","""1:25677844""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:25677844


trait_efo
78
63
0
0
trait_efo_category
0
141
0
0




FBS







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:43453188,[1:42953188-1:43953188),"""EFO_0004468""","""Other measurement""","""1:43455283""","""EFO_0004465""","[""GCST90278628""]","""fasting blood glucose measurement""","""fasting blood glucose measurement""",1:43455283
1,1:43453188,[1:42953188-1:43953188),"""EFO_0004468""","""Other measurement""","""1:43805737""","""EFO_0004308""","[""GCST90278647""]","""leukocyte count""","""leukocyte count""",1:43805737
1,1:43453188,[1:42953188-1:43953188),"""EFO_0004468""","""Other measurement""","""1:43884714""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:43884714


trait_efo
47
40
0
0
trait_efo_category
0
87
0
0




INSULIN







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,2:27730940,[2:27230940-2:28230940),"""EFO_0004467""","""Other measurement""","""2:27333445""","""EFO_0004530""","[""GCST90255426""]","""triglyceride measurement""","""triglyceride measurement""",2:27333445
1,2:27730940,[2:27230940-2:28230940),"""EFO_0004467""","""Other measurement""","""2:27341204""","""EFO_0004532""","[""GCST90255490""]","""serum gamma-glutamyl transferase measurement""","""serum gamma-glutamyl transferase measurement""",2:27341204
1,2:27730940,[2:27230940-2:28230940),"""EFO_0004467""","""Other measurement""","""2:27341204""","""EFO_0004530""","[""GCST90255493""]","""triglyceride measurement""","""triglyceride measurement""",2:27341204


trait_efo
0
6
0
0
trait_efo_category
0
6
0
0




GOT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16516585,[1:16016585-1:17016585),"""EFO_0004736""","""Liver enzyme measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278620""]","""bone density""","""bone density""",1:16184747
1,1:16516585,[1:16016585-1:17016585),"""EFO_0004736""","""Liver enzyme measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278621""]","""bone density""","""bone density""",1:16184747
1,1:16516585,[1:16016585-1:17016585),"""EFO_0004736""","""Liver enzyme measurement""","""1:16364255""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:16364255


trait_efo
39
31
0
0
trait_efo_category
0
70
0
0




GPT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:31886498,[1:31386498-1:32386498),"""EFO_0004735""","""Liver enzyme measurement""","""1:31707354""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:31707354
1,1:31886498,[1:31386498-1:32386498),"""EFO_0004735""","""Liver enzyme measurement""","""1:31872750""","""EFO_0004736""","[""GCST90255486""]","""aspartate aminotransferase measurement""","""aspartate aminotransferase measurement""",1:31872750
1,1:31886498,[1:31386498-1:32386498),"""EFO_0004735""","""Liver enzyme measurement""","""1:31881152""","""EFO_0004735""","[""GCST90255488""]","""serum alanine aminotransferase measurement""","""serum alanine aminotransferase measurement""",1:31881152


trait_efo
38
30
0
0
trait_efo_category
0
68
0
0




GGT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16505320,[1:16005320-1:17005320),"""EFO_0004532""","""Liver enzyme measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278620""]","""bone density""","""bone density""",1:16184747
1,1:16505320,[1:16005320-1:17005320),"""EFO_0004532""","""Liver enzyme measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278621""]","""bone density""","""bone density""",1:16184747
1,1:16505320,[1:16005320-1:17005320),"""EFO_0004532""","""Liver enzyme measurement""","""1:16364255""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:16364255


trait_efo
77
27
0
0
trait_efo_category
0
104
0
0




BIL







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:156245606,[1:155745606-1:156745606),"""EFO_0004570""","""Other measurement""","""1:156045662""","""EFO_0004741""","[""GCST90255483""]","""blood urea nitrogen measurement""","""blood urea nitrogen measurement""",1:156045662
1,1:156245606,[1:155745606-1:156745606),"""EFO_0004570""","""Other measurement""","""1:156045662""","""EFO_0000178""","[""GCST90255470""]","""gastric carcinoma""","""gastric carcinoma""",1:156045662
1,1:156245606,[1:155745606-1:156745606),"""EFO_0004570""","""Other measurement""","""1:156095359""","""EFO_0004509""","[""GCST90255497""]","""hemoglobin measurement""","""hemoglobin measurement""",1:156095359


trait_efo
24
21
0
0
trait_efo_category
0
45
0
0




ALB







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16528218,[1:16028218-1:17028218),"""EFO_0004535""","""Other measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278620""]","""bone density""","""bone density""",1:16184747
1,1:16528218,[1:16028218-1:17028218),"""EFO_0004535""","""Other measurement""","""1:16184747""","""EFO_0003923""","[""GCST90278621""]","""bone density""","""bone density""",1:16184747
1,1:16528218,[1:16028218-1:17028218),"""EFO_0004535""","""Other measurement""","""1:16364255""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:16364255


trait_efo
35
12
0
0
trait_efo_category
0
47
0
0




TSH







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:19841174,[1:19341174-1:20341174),"""EFO_0004748""","""Other measurement""","""1:19964699""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:19964699
2,1:22515032,[1:22015032-1:23015032),"""EFO_0004748""","""Other measurement""","""1:22115291""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:22115291
2,1:22515032,[1:22015032-1:23015032),"""EFO_0004748""","""Other measurement""","""1:22355978""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:22355978


trait_efo
0
100
0
0
trait_efo_category
0
100
0
0




CEA







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,2:109504287,[2:109004287-2:110004287),"""EFO_0005760""","""Other measurement""","""2:109941182""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",2:109941182
2,2:242709272,[2:242209272-2:243199373),"""EFO_0005760""","""Other measurement""","""2:242237902""","""EFO_0004535""","[""GCST90278616""]","""serum albumin measurement""","""serum albumin measurement""",2:242237902
2,2:242709272,[2:242209272-2:243199373),"""EFO_0005760""","""Other measurement""","""2:242288798""","""EFO_0004339""","[""GCST90255411""]","""body height""","""body height""",2:242288798


trait_efo
0
16
0
0
trait_efo_category
0
16
0
0




CREAT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:10707812,[1:10207812-1:11207812),"""EFO_0004518""","""Other measurement""","""1:10702266""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10702266
1,1:10707812,[1:10207812-1:11207812),"""EFO_0004518""","""Other measurement""","""1:10709267""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10709267
1,1:10707812,[1:10207812-1:11207812),"""EFO_0004518""","""Other measurement""","""1:10722657""","""EFO_0004741""","[""GCST90255483""]","""blood urea nitrogen measurement""","""blood urea nitrogen measurement""",1:10722657


trait_efo
63
20
0
0
trait_efo_category
0
83
0
0




ADIPO







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,3:52529266,[3:52029266-3:53029266),"""EFO_0004502""","""Other measurement""","""3:52177700""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",3:52177700
1,3:52529266,[3:52029266-3:53029266),"""EFO_0004502""","""Other measurement""","""3:52348364""","""EFO_0004535""","[""GCST90255484""]","""serum albumin measurement""","""serum albumin measurement""",3:52348364
1,3:52529266,[3:52029266-3:53029266),"""EFO_0004502""","""Other measurement""","""3:52584787""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",3:52584787


trait_efo
0
13
0
0
trait_efo_category
0
13
0
0




URIC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:15914078,[1:15414078-1:16414078),"""EFO_0004761""","""Cardiovascular measurement""","""1:15539259""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15539259
1,1:15914078,[1:15414078-1:16414078),"""EFO_0004761""","""Cardiovascular measurement""","""1:15782677""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15782677
1,1:15914078,[1:15414078-1:16414078),"""EFO_0004761""","""Cardiovascular measurement""","""1:15816768""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:15816768


trait_efo
37
63
0
0
trait_efo_category
0
100
0
0




ALP







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:21893344,[1:21393344-1:22393344),"""EFO_0004533""","""Liver enzyme measurement""","""1:22115291""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:22115291
1,1:21893344,[1:21393344-1:22393344),"""EFO_0004533""","""Liver enzyme measurement""","""1:22355978""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:22355978
2,1:219305644,[1:218805644-1:219805644),"""EFO_0004533""","""Liver enzyme measurement""","""1:218879684""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:218879684


trait_efo
0
33
0
0
trait_efo_category
0
33
0
0




HB







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:10796866,[1:10296866-1:11296866),"""EFO_0004509""","""Hematological measurement""","""1:10702266""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10702266
1,1:10796866,[1:10296866-1:11296866),"""EFO_0004509""","""Hematological measurement""","""1:10709267""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",1:10709267
1,1:10796866,[1:10296866-1:11296866),"""EFO_0004509""","""Hematological measurement""","""1:10722657""","""EFO_0004741""","[""GCST90255483""]","""blood urea nitrogen measurement""","""blood urea nitrogen measurement""",1:10722657


trait_efo
70
72
0
0
trait_efo_category
0
142
0
0




HCT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16379015,[1:15879015-1:16879015),"""EFO_0004348""","""Hematological measurement""","""1:15881540""","""EFO_0004305""","[""GCST90278641""]","""erythrocyte count""","""erythrocyte count""",1:15881540
1,1:16379015,[1:15879015-1:16879015),"""EFO_0004348""","""Hematological measurement""","""1:15911349""","""EFO_0004518""","[""GCST90278624""]","""creatinine measurement""","""creatinine measurement""",1:15911349
1,1:16379015,[1:15879015-1:16879015),"""EFO_0004348""","""Hematological measurement""","""1:15923530""","""EFO_0004761""","[""GCST90278646""]","""uric acid measurement""","""uric acid measurement""",1:15923530


trait_efo
36
13
0
0
trait_efo_category
0
49
0
0




MCH







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:118261993,[1:117761993-1:118761993),"""EFO_0004527""","""Hematological measurement""","""1:118155145""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:118155145
1,1:118261993,[1:117761993-1:118761993),"""EFO_0004527""","""Hematological measurement""","""1:118155145""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:118155145
1,1:118261993,[1:117761993-1:118761993),"""EFO_0004527""","""Hematological measurement""","""1:118155620""","""EFO_0004309""","[""GCST90278640""]","""platelet count""","""platelet count""",1:118155620


trait_efo
0
77
0
0
trait_efo_category
0
77
0
0




MCHC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:158580069,[1:158080069-1:159080069),"""EFO_0004528""","""Hematological measurement""","""1:158582552""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:158582552
1,1:158580069,[1:158080069-1:159080069),"""EFO_0004528""","""Hematological measurement""","""1:158585415""","""EFO_0004541""","[""GCST90255481""]","""HbA1c measurement""","""HbA1c measurement""",1:158585415
1,1:158580069,[1:158080069-1:159080069),"""EFO_0004528""","""Hematological measurement""","""1:158586966""","""EFO_0004570""","[""GCST90255487""]","""bilirubin measurement""","""bilirubin measurement""",1:158586966


trait_efo
0
30
0
0
trait_efo_category
0
30
0
0




MCV







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:26872832,[1:26372832-1:27372832),"""EFO_0004526""","""Hematological measurement""","""1:26602550""","""EFO_0004309""","[""GCST90278640""]","""platelet count""","""platelet count""",1:26602550
1,1:26872832,[1:26372832-1:27372832),"""EFO_0004526""","""Hematological measurement""","""1:26643593""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:26643593
1,1:26872832,[1:26372832-1:27372832),"""EFO_0004526""","""Hematological measurement""","""1:26796922""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:26796922


trait_efo
0
133
0
0
trait_efo_category
0
133
0
0




RBC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:16370791,[1:15870791-1:16870791),"""EFO_0004305""","""Hematological measurement""","""1:15881540""","""EFO_0004305""","[""GCST90278641""]","""erythrocyte count""","""erythrocyte count""",1:15881540
1,1:16370791,[1:15870791-1:16870791),"""EFO_0004305""","""Hematological measurement""","""1:15911349""","""EFO_0004518""","[""GCST90278624""]","""creatinine measurement""","""creatinine measurement""",1:15911349
1,1:16370791,[1:15870791-1:16870791),"""EFO_0004305""","""Hematological measurement""","""1:15923530""","""EFO_0004761""","[""GCST90278646""]","""uric acid measurement""","""uric acid measurement""",1:15923530


trait_efo
77
27
0
0
trait_efo_category
0
104
0
0




RDW







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:25811488,[1:25311488-1:26311488),"""EFO_0005192""","""Hematological measurement""","""1:25607316""","""EFO_0004541""","[""GCST90255414""]","""HbA1c measurement""","""HbA1c measurement""",1:25607316
1,1:25811488,[1:25311488-1:26311488),"""EFO_0005192""","""Hematological measurement""","""1:25611035""","""EFO_0004541""","[""GCST90255481""]","""HbA1c measurement""","""HbA1c measurement""",1:25611035
1,1:25811488,[1:25311488-1:26311488),"""EFO_0005192""","""Hematological measurement""","""1:25677844""","""EFO_0004541""","[""GCST90278632""]","""HbA1c measurement""","""HbA1c measurement""",1:25677844


trait_efo
0
68
0
0
trait_efo_category
0
68
0
0




WBC







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:36949623,[1:36449623-1:37449623),"""EFO_0004308""","""Hematological measurement""","""1:36880829""","""EFO_0004308""","[""GCST90255494""]","""leukocyte count""","""leukocyte count""",1:36880829
1,1:36949623,[1:36449623-1:37449623),"""EFO_0004308""","""Hematological measurement""","""1:36943916""","""EFO_0004308""","[""GCST90278647""]","""leukocyte count""","""leukocyte count""",1:36943916
1,1:36949623,[1:36449623-1:37449623),"""EFO_0004308""","""Hematological measurement""","""1:36969334""","""EFO_0004308""","[""GCST90255494""]","""leukocyte count""","""leukocyte count""",1:36969334


trait_efo
44
18
0
0
trait_efo_category
0
62
0
0




PLT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:1273278,[1:773278-1:1773278),"""EFO_0004309""","""Hematological measurement""","""1:1316674""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:1316674
4,1:8893500,[1:8393500-1:9393500),"""EFO_0004309""","""Hematological measurement""","""1:8776719""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:8776719
4,1:8893500,[1:8393500-1:9393500),"""EFO_0004309""","""Hematological measurement""","""1:9266901""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:9266901


trait_efo
172
95
0
0
trait_efo_category
0
267
0
0




EOS







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:155090013,[1:154590013-1:155590013),"""EFO_0004842""","""Hematological measurement""","""1:155072014""","""EFO_0004509""","[""GCST90255497""]","""hemoglobin measurement""","""hemoglobin measurement""",1:155072014
1,1:155090013,[1:154590013-1:155590013),"""EFO_0004842""","""Hematological measurement""","""1:155072014""","""EFO_0004348""","[""GCST90255496""]","""hematocrit""","""hematocrit""",1:155072014
1,1:155090013,[1:154590013-1:155590013),"""EFO_0004842""","""Hematological measurement""","""1:155105388""","""EFO_0004532""","[""GCST90278630""]","""serum gamma-glutamyl transferase measurement""","""serum gamma-glutamyl transferase measurement""",1:155105388


trait_efo
0
29
0
0
trait_efo_category
0
29
0
0




SBP







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
2,1:28793149,[1:28293149-1:29293149),"""EFO_0006335""","""Cardiovascular measurement""","""1:28298951""","""EFO_0004574""","[""GCST90255489""]","""total cholesterol measurement""","""total cholesterol measurement""",1:28298951
2,1:28793149,[1:28293149-1:29293149),"""EFO_0006335""","""Cardiovascular measurement""","""1:28319484""","""EFO_0004612""","[""GCST90255424""]","""high density lipoprotein cholesterol measurement""","""high density lipoprotein cholesterol measurement""",1:28319484
2,1:28793149,[1:28293149-1:29293149),"""EFO_0006335""","""Cardiovascular measurement""","""1:28380087""","""EFO_0004612""","[""GCST90255491""]","""high density lipoprotein cholesterol measurement""","""high density lipoprotein cholesterol measurement""",1:28380087


trait_efo
37
24
0
0
trait_efo_category
0
61
0
0




DBP







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
2,1:236317593,[1:235817593-1:236817593),"""EFO_0006336""","""Cardiovascular measurement""","""1:236326005""","""EFO_0006336""","[""GCST90278625""]","""diastolic blood pressure""","""diastolic blood pressure""",1:236326005
1,1:230852551,[1:230352551-1:231352551),"""EFO_0006336""","""Cardiovascular measurement""",,,,"""-1""",,
3,2:165008513,[2:164508513-2:165508513),"""EFO_0006336""","""Cardiovascular measurement""","""2:164780696""","""EFO_0006335""","[""GCST90255476""]","""systolic blood pressure""","""systolic blood pressure""",2:164780696


trait_efo
29
12
0
0
trait_efo_category
0
41
0
0




WT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:51433687,[1:50933687-1:51933687),"""EFO_0004338""","""Body measurement""","""1:50964446""","""EFO_0004339""","[""GCST90255411""]","""body height""","""body height""",1:50964446
1,1:51433687,[1:50933687-1:51933687),"""EFO_0004338""","""Body measurement""","""1:51103268""","""EFO_0000400""","[""GCST90255468""]","""diabetes mellitus""","""diabetes mellitus""",1:51103268
1,1:51433687,[1:50933687-1:51933687),"""EFO_0004338""","""Body measurement""","""1:51179515""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:51179515


trait_efo
68
20
0
0
trait_efo_category
0
88
0
0




HEIGHT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:1559703,[1:1059703-1:2059703),"""EFO_0004339""","""Body measurement""","""1:1316674""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:1316674
2,1:3417131,[1:2917131-1:3917131),"""EFO_0004339""","""Body measurement""","""1:3051621""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",1:3051621
2,1:3417131,[1:2917131-1:3917131),"""EFO_0004339""","""Body measurement""","""1:3099194""","""EFO_0004305""","[""GCST90255495""]","""erythrocyte count""","""erythrocyte count""",1:3099194


trait_efo
358
70
0
0
trait_efo_category
0
428
0
0




BMI







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
2,1:74992546,[1:74492546-1:75492546),"""EFO_0004340""","""Body measurement""","""1:74968574""","""EFO_0004351""","[""GCST90278636""]","""resting heart rate""","""resting heart rate""",1:74968574
2,1:74992546,[1:74492546-1:75492546),"""EFO_0004340""","""Body measurement""","""1:75006027""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:75006027
4,1:107886546,[1:107386546-1:108386546),"""EFO_0004340""","""Body measurement""","""1:107907401""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:107907401


trait_efo
96
72
0
0
trait_efo_category
0
168
0
0




WAIST







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:177889025,[1:177389025-1:178389025),"""EFO_0004342""","""Body measurement""","""1:177810571""","""EFO_0004340""","[""GCST90255413""]","""body mass index""","""body mass index""",1:177810571
1,1:177889025,[1:177389025-1:178389025),"""EFO_0004342""","""Body measurement""","""1:177813039""","""EFO_0004340""","[""GCST90255480""]","""body mass index""","""body mass index""",1:177813039
1,1:177889025,[1:177389025-1:178389025),"""EFO_0004342""","""Body measurement""","""1:177835040""","""EFO_0004340""","[""GCST90255413""]","""body mass index""","""body mass index""",1:177835040


trait_efo
15
28
0
0
trait_efo_category
0
43
0
0




SMOKA_MOD







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,20:61993767,[20:61493767-20:62493767),"""EFO_0004318""","""Biological process""","""20:61992005""","""EFO_0004308""","[""GCST90255494""]","""leukocyte count""","""leukocyte count""",20:61992005
1,20:61993767,[20:61493767-20:62493767),"""EFO_0004318""","""Biological process""","""20:62372706""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",20:62372706
1,20:61993767,[20:61493767-20:62493767),"""EFO_0004318""","""Biological process""","""20:62435465""","""EFO_0004348""","[""GCST90255496""]","""hematocrit""","""hematocrit""",20:62435465


trait_efo
0
1
0
0
trait_efo_category
0
1
0
0




ALCO_AMOUNT







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,3:38052725,[3:37552725-3:38552725),"""EFO_0007878""","""Other measurement""","""3:38052613""","""EFO_0004339""","[""GCST90255478""]","""body height""","""body height""",3:38052613
1,3:38052725,[3:37552725-3:38552725),"""EFO_0007878""","""Other measurement""","""3:38444489""","""EFO_0004518""","[""GCST90278624""]","""creatinine measurement""","""creatinine measurement""",3:38444489
1,3:38052725,[3:37552725-3:38552725),"""EFO_0007878""","""Other measurement""","""3:38484175""","""EFO_0004518""","[""GCST90255485""]","""creatinine measurement""","""creatinine measurement""",3:38484175


trait_efo
3
2
0
0
trait_efo_category
0
5
0
0




COFFA







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,4:89020503,[4:88520503-4:89520503),"""EFO_0006781""","""Other measurement""","""4:88692427""","""EFO_0004735""","[""GCST90255488""]","""serum alanine aminotransferase measurement""","""serum alanine aminotransferase measurement""",4:88692427
1,4:89020503,[4:88520503-4:89520503),"""EFO_0006781""","""Other measurement""","""4:88768860""","""EFO_0003923""","[""GCST90278620""]","""bone density""","""bone density""",4:88768860
1,4:89020503,[4:88520503-4:89520503),"""EFO_0006781""","""Other measurement""","""4:88768860""","""EFO_0003923""","[""GCST90278621""]","""bone density""","""bone density""",4:88768860


trait_efo
0
3
0
0
trait_efo_category
0
3
0
0




In [None]:
# 10/05/2024
# After correcting EFO_ID for FBS

In [5]:
trait_list = ["FBS"]
res = pd.DataFrame()
for i in range(len(trait_list)):
    trait = trait_list[i]
    print(trait)
    annotate_known_loci_500kb_EAS(trait)  

Initializing Hail with default parameters...


FBS
24/10/06 00:25:17 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Running on Apache Spark version 3.3.4
SparkUI available at http://holy7c04105.rc.fas.harvard.edu:4042
Welcome to
     __  __     <>__
    / /_/ /__  __/ /
   / __  / _ `/ / /
  /_/ /_/\_,_/_/_/   version 0.2.130-bea04d9c79b5
LOGGING: writing to /n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/hail-20241006-0025-0.2.130-bea04d9c79b5.log
2024-10-06 00:25:25.952 Hail: INFO: Reading table to impute column types
2024-10-06 00:25:27.198 Hail: INFO: Finished type imputation
  Loading field 'locus' as type str (imputed)
  Loading field 'trait_efo' as type str (imputed)
  Loading field 'study_id' as type str (imputed)
  Loading field 'trait_efo_term' as type str (imputed)
  Loading field 'trait_efo_category' as type str (imputed)




2024-10-06 00:25:30.841 Hail: INFO: Reading table to impute column types
2024-10-06 00:25:41.574 Hail: INFO: Finished type imputation=> (973 +

idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:43453188,[1:42953188-1:43953188),"""EFO_0004465""","""Other measurement""","""1:43455283""","""EFO_0004465""","[""GCST90278628""]","""fasting blood glucose measurement""","""fasting blood glucose measurement""",1:43455283
1,1:43453188,[1:42953188-1:43953188),"""EFO_0004465""","""Other measurement""","""1:43805737""","""EFO_0004308""","[""GCST90278647""]","""leukocyte count""","""leukocyte count""",1:43805737
1,1:43453188,[1:42953188-1:43953188),"""EFO_0004465""","""Other measurement""","""1:43884714""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:43884714


trait_efo
44
43
0
0
trait_efo_category
0
87
0
0


2024-10-06 00:25:53.038 Hail: INFO: merging 1001 files totalling 232.1K... 1000]
2024-10-06 00:25:53.179 Hail: INFO: while writing:
    /n/holylfs05/LABS/kraft_lab/Lab/KCPS2/OTG/KCPS2_otg/known_kcps2_loci_FBS_500kb_EAS.tsv
  merge time: 138.290ms
2024-10-06 00:26:36.188 Hail: INFO: Reading table to impute column types
2024-10-06 00:26:36.650 Hail: INFO: Finished type imputation
  Loading field 'locus' as type str (imputed)
  Loading field 'trait_efo' as type str (imputed)
  Loading field 'study_id' as type str (imputed)
  Loading field 'trait_efo_term' as type str (imputed)
  Loading field 'trait_efo_category' as type str (imputed)
2024-10-06 00:26:38.823 Hail: INFO: Reading table to impute column types
2024-10-06 00:26:46.464 Hail: INFO: Finished type imputation
  Loading field 'idx' as type int32 (imputed)
  Loading field 'lead_locus' as type str (imputed)
  Loading field 'region' as type str (imputed)
  Loading field 'trait_efo' as type str (imputed)
  Loading field 'trait_efo_categ

In [7]:
trait_list = ["FBS"]
res = pd.DataFrame()
for i in range(len(trait_list)):
    trait = trait_list[i]
    print(trait)
    annotate_known_loci_1Mb_EAS(trait)  

FBS







idx,lead_locus,region,trait_efo,trait_efo_category,locus_otg,trait_efo_otg,study_id_otg,trait_efo_term,trait_efo_category_otg,locus
int32,locus<GRCh37>,interval<locus<GRCh37>>,str,str,str,str,array<str>,str,str,locus<GRCh37>
1,1:43453188,[1:42453188-1:44453188),"""EFO_0004465""","""Other measurement""","""1:43455283""","""EFO_0004465""","[""GCST90278628""]","""fasting blood glucose measurement""","""fasting blood glucose measurement""",1:43455283
1,1:43453188,[1:42453188-1:44453188),"""EFO_0004465""","""Other measurement""","""1:43805737""","""EFO_0004308""","[""GCST90278647""]","""leukocyte count""","""leukocyte count""",1:43805737
1,1:43453188,[1:42453188-1:44453188),"""EFO_0004465""","""Other measurement""","""1:43884714""","""EFO_0004309""","[""GCST90255498""]","""platelet count""","""platelet count""",1:43884714


trait_efo
44
43
0
0
trait_efo_category
0
87
0
0




In [None]:
# tmp - distance clumping

In [None]:
def agg_distance_clumping(
    locus_expr, alleles_expr, pvalue_expr, radius=500000, nlog_p=False, merge_overlapping_loci=True
):
    def _distance_clumping(f, lead_st_arr, remaining_st_arr):
        new_lead_st = remaining_st_arr[0]
        new_region = new_lead_st.region
        remaining_st_arr = remaining_st_arr.filter(lambda x: ~new_region.contains(x.lead_locus))

        # check whether a new region overlaps with previous ones
        overlapping_st = (
            lead_st_arr.find(lambda x: new_region.overlaps(x.region))
            if merge_overlapping_loci
            else hl.missing(new_lead_st.dtype)
        )
        lead_st_arr = hl.if_else(
            hl.is_defined(overlapping_st),
            lead_st_arr.map(
                lambda x: hl.if_else(
                    x == overlapping_st,
                    x.annotate(
                        region=hl.rbind(
                            overlapping_st.region,
                            lambda old_region: hl.interval(
                                hl.if_else(
                                    old_region.start < new_region.start,
                                    old_region.start,
                                    new_region.start,
                                ),
                                hl.if_else(old_region.end > new_region.end, old_region.end, new_region.end),
                            ),
                        )
                    ),
                    x,
                )
            ),
            lead_st_arr.append(new_lead_st),
        )
        converged = hl.len(remaining_st_arr) == 0

        return hl.if_else(converged, lead_st_arr, f(lead_st_arr, remaining_st_arr))

    # Modified from hl.expr.functions.sorted -- key is now another collection, not callable
    def _sorted(collection, key, reverse=False):
        def comp(left, right):
            return (
                hl.case()
                .when(hl.is_missing(left), False)
                .when(hl.is_missing(right), True)
                .when(reverse, hl._compare(right, left) < 0)
                .default(hl._compare(left, right) < 0)
            )

        return _sort_by(hl.zip(key, collection), lambda l, r: comp(l[0], r[0])).map(lambda elt: elt[1])

    # lead locus / region pair is necessary to check overlapping regions during the loop
    x = hl.agg.collect(
        hl.struct(
            st=hl.struct(
                lead_locus=locus_expr,
                lead_alleles=alleles_expr,
                region=locus_expr.window(radius, radius),
                lead_pvalue=pvalue_expr,
            )
        )
    )
    sorted_st = _sorted(x.st, x.st.lead_pvalue, reverse=nlog_p)
    t_struct = sorted_st[0].dtype

    run_distance_clumping = hl.experimental.define_function(
        lambda sorted_st: hl.experimental.loop(
            _distance_clumping, hl.tarray(t_struct), hl.empty_array(t_struct), sorted_st
        ),
        hl.tarray(t_struct),
    )
    return run_distance_clumping(sorted_st)


In [None]:
ht = ht.group_by(*list(mt.col_key)).aggregate(
            distance_clumps=agg_distance_clumping(
                ht.locus,
                ht.alleles,
                ht.Pvalue,
                radius=args.radius,
                nlog_p=False,
                merge_overlapping_loci=(not args.not_merge_overlapping_loci),
            )
        )