# Purpose:

2014-12-26 (Friday)

Create code to make tables with info from `argot2` annotations for all genes in a gene-list.

# Implementation:

## Imports:

In [79]:
# imports
from collections import defaultdict

import pandas as pd

import munch

from IPython.display import display, HTML

from spartan.utils.annotations.ensembl.gff3 import parse_gff3_attributes
from spartan.utils.files import tableFile2namedTuple

## File paths:

In [2]:
# define paths to files
bpth = "/home/gus/remote_mounts/louise/data/"

## basic genome files
fanno = bpth + "genomes/glossina_fuscipes/annotations/functional/GfusI1.1_pre/argot2_out/argot_functional_annotations_ts150.h5"

## project specific files
top1_environment = bpth + "projects/ddrad58/SNPs_of_interest/genes_near_SNPs/tsetseFINAL_14Oct2014_f2_53_v_GfusI1.1.window.5000.top1_env.tsv"
top1_infection = bpth + "projects/ddrad58/SNPs_of_interest/genes_near_SNPs/tsetseFINAL_14Oct2014_f2_53_v_GfusI1.1.window.5000.top1_infection.tsv"

top5_environment = bpth + "projects/ddrad58/SNPs_of_interest/genes_near_SNPs/tsetseFINAL_14Oct2014_f2_53_v_GfusI1.1.window.5000.top5_env.tsv"
top5_infection = bpth + "projects/ddrad58/SNPs_of_interest/genes_near_SNPs/tsetseFINAL_14Oct2014_f2_53_v_GfusI1.1.window.5000.top5_infection.tsv"



## Set up main data variables:

### Set up annotation database:

In [3]:
argot2 = pd.read_hdf(path_or_buf=fanno, key='dataframe')

In [4]:
argot2.head()

Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
0,GFUI034947-PA,P,GO:0006508,proteolysis,270.313447,0.496543,8.247696
1,GFUI035874-PA,F,GO:0005515,protein binding,529.038456,0.5,5.471582
2,GFUI033625-PA,P,GO:0005980,glycogen catabolic process,307.758251,0.284597,13.42445
3,GFUI033625-PA,F,GO:0004134,4-alpha-glucanotransferase activity,159.513252,0.177063,10.153643
4,GFUI033625-PA,F,GO:0004135,"amylo-alpha-1,6-glucosidase activity",184.352303,0.177063,11.734746


### Set up gene/SNP relationship data:

#### Function to create dictionary-based retrieval object for gene/SNP data:

In [33]:
def bed3_v_gff3_window_by_gff3_ID(bedtools_window_out):
    """
    Returns dictionary-based retrieval object for Bedtools result from 3-field BED format records vs
    GFF3 format records data grouped by the GFF3 features.
    """
    headers = ["bed3_seq",
               "bed3_start",
               "bed3_end",
               "gff3_seq",
               "gff3_source",
               "gff3_type",
               "gff3_start",
               "gff3_end",
               "gff3_score",
               "gff3_strand",
               "gff3_phase",
               "gff3_attributes",]
    
    table = tableFile2namedTuple(bedtools_window_out, 
                                 sep='\t', 
                                 headers=headers)
    
    Tree = lambda: defaultdict(Tree)
    data = Tree()
    
    for row in table:
        gff3_rec = parse_gff3_attributes(row.gff3_attributes)['ID']

        data[gff3_rec]['info'].setdefault('seq', row.gff3_seq)
        data[gff3_rec]['info'].setdefault('source', row.gff3_source)
        data[gff3_rec]['info'].setdefault('type', row.gff3_type)
        data[gff3_rec]['info'].setdefault('start', row.gff3_start)
        data[gff3_rec]['info'].setdefault('end', row.gff3_end)
        data[gff3_rec]['info'].setdefault('score', row.gff3_score)
        data[gff3_rec]['info'].setdefault('strand', row.gff3_strand)
        data[gff3_rec]['info'].setdefault('phase', row.gff3_phase)
        data[gff3_rec]['info'].setdefault('attributes', row.gff3_attributes)

        bed3_hit = dict(seq = row.bed3_seq, 
                        start = row.bed3_start,
                        end = row.bed3_end)

        try:
            data[gff3_rec]['bed3_hits'].append(bed3_hit)

        except AttributeError:
            data[gff3_rec]['bed3_hits'] = []
            data[gff3_rec]['bed3_hits'].append(bed3_hit)

    return munch.munchify(data)

In [51]:
top5_environment_search = bed3_v_gff3_window_by_gff3_ID(top5_environment)

In [35]:
len(top5_environment_search)

83

In [36]:
top5_environment_search.keys()[:3]

['GFUI003362', 'GFUI017236', 'GFUI022267']

In [37]:
top5_environment_search['GFUI003362']['bed3_hits']

[Munch(end='164021', seq='Scaffold109', start='164020')]

In [43]:
top5_environment_search['GFUI003362'].bed3_hits

[Munch(end='164021', seq='Scaffold109', start='164020')]

## Create tables for our genes that reference the SNP location they were associated with:

In [47]:
class ListTable(list):
    """ Overridden list class which takes a 2-dimensional list of 
        the form [[1,2,3],[4,5,6]], and renders an HTML Table in 
        IPython Notebook. """
    
    def _repr_html_(self):
        html = ["<table>"]
        for index, row in enumerate(self):
            html.append("<tr>")
            
            for col in row:
                # if header, format as such
                if index == 0:
                    html.append("<th>{0}</th>".format(col))
                else:
                    html.append("<td>{0}</td>".format(col))
            
            html.append("</tr>")
        html.append("</table>")
        return ''.join(html)

In [52]:
# load SNP search data
top1_environment = bed3_v_gff3_window_by_gff3_ID(top1_environment)
top1_infection   = bed3_v_gff3_window_by_gff3_ID(top1_infection)

top5_environment = bed3_v_gff3_window_by_gff3_ID(top5_environment)
top5_infection   = bed3_v_gff3_window_by_gff3_ID(top5_infection)

In [87]:
top5_environment.GFUI003362.bed3_hits

[Munch(end='164021', seq='Scaffold109', start='164020')]

In [55]:
def filter_by_gene(gene, argot_df):
    return argot_df[argot_df.Sequence.str.startswith(gene)]

In [59]:
def sort_by_TS(argot_df):
    return argot_df.sort('Total Score', ascending=0)

In [61]:
def filter_by_TS(tscore, argot_df):
    return argot_df[argot_df["Total Score"] >= tscore]

In [81]:
def gene_table(gene_name, argot_df, tscore):
    return sort_by_TS(filter_by_TS(tscore, filter_by_gene(gene_name, argot_df)))

In [74]:
def vb_link(gene_name, link_template):
    return HTML(link_template % dict(gene_name=gene_name))

In [89]:
def format_snps(snp_list):
    snp_line = "%(seq)s:%(end)s"
    
    snp_strs = []
    
    for snp in snp_list:
        snp_strs.append(snp_line % snp)
    
    br = "<br>"
    return br.join(snp_strs) + br

In [83]:
# Vectorbase Link Templates
gene_page          = '''<a href="https://www.vectorbase.org/Glossina_fuscipes/Gene/Summary?db=core;g=%(gene_name)s"> %(gene_name)s: gene home page</a>'''
protein_summary    = '''<a href="https://www.vectorbase.org/Glossina_fuscipes/Transcript/ProteinSummary?db=core;g=%(gene_name)s"> %(gene_name)s: protein summary </a>'''
gene_ontology      = '''<a href="https://www.vectorbase.org/Glossina_fuscipes/Gene/Ontology/molecular_function?db=core;g=%(gene_name)s;oid=molecular_function"> %(gene_name)s: gene ontology </a>'''
gene_region_detail = '''<a href="https://www.vectorbase.org/Glossina_fuscipes/Location/View?db=core;g=%(gene_name)s;r"> %(gene_name)s: genome browser </a>'''

In [93]:
def present_gene(gene_name, nearby_snps):
    display(HTML("<h2>%s</h2>" % (gene_name)))
    
    nearby_snps = format_snps(nearby_snps)
    display(HTML("<b>SNP location:</b><br>%s" % (nearby_snps)))
    
    display(gene_table(gene_name=gene_name,
                       argot_df=argot2,
                       tscore=200))
    
    display(HTML("<p>"))
    display(vb_link(gene_name=gene_name, link_template=gene_page))
    display(vb_link(gene_name=gene_name, link_template=protein_summary))
    display(vb_link(gene_name=gene_name, link_template=gene_ontology))
    display(vb_link(gene_name=gene_name, link_template=gene_region_detail))
    display(HTML("<hr>"))

# Functional Annotation of the genes associated with the top 5 infection SNPs:

In [94]:
for gene in top5_environment:
    
    snps = top5_environment[gene].bed3_hits
    present_gene(gene_name=gene, nearby_snps=snps)
    

Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
61661,GFUI025369-PA,P,GO:0050909,sensory perception of taste,4494.034613,0.420161,14.238183
61666,GFUI025369-PA,C,GO:0016021,integral component of membrane,758.37787,0.321204,3.692502
61664,GFUI025369-PA,C,GO:0005886,plasma membrane,702.268317,0.284545,4.357169
61663,GFUI025369-PA,F,GO:0008527,taste receptor activity,499.685908,0.350345,12.95208
61665,GFUI025369-PA,C,GO:0016020,membrane,398.543846,0.890289,2.472824
61662,GFUI025369-PA,F,GO:0004871,signal transducer activity,232.009897,0.818926,5.118474


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
31310,GFUI031969-PA,P,GO:0006457,protein folding,438.074284,0.08754,9.862501
31311,GFUI031969-PA,F,GO:0051082,unfolded protein binding,246.971872,0.101525,7.402828


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
16676,GFUI040028-PA,C,GO:0032580,Golgi cisterna membrane,5454.162533,0.263156,11.587434
16675,GFUI040028-PA,F,GO:0008376,acetylgalactosaminyltransferase activity,4011.619006,0.382311,12.188692
16674,GFUI040028-PA,P,GO:0030206,chondroitin sulfate biosynthetic process,531.080335,0.130495,17.190854
16673,GFUI040028-PA,P,GO:0008152,metabolic process,244.283911,0.665907,0.544703


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
40513,GFUI017230-PA,P,GO:0015937,coenzyme A biosynthetic process,200.189044,0.171575,11.339269


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
30412,GFUI031964-PA,P,GO:0009113,purine nucleobase biosynthetic process,5491.021276,0.177935,11.587483
30420,GFUI031964-PA,C,GO:0005737,cytoplasm,4674.934743,0.701739,3.178476
30411,GFUI031964-PA,P,GO:0006189,'de novo' IMP biosynthetic process,2867.784821,0.189605,10.660827
30410,GFUI031964-PA,P,GO:0006164,purine nucleotide biosynthetic process,1986.856174,0.369262,8.746778
30415,GFUI031964-PA,F,GO:0005524,ATP binding,651.975366,0.107022,4.006505
30417,GFUI031964-PA,F,GO:0046872,metal ion binding,501.280697,0.225829,3.508837
30413,GFUI031964-PA,P,GO:0009058,biosynthetic process,494.358948,0.662878,3.224683
30414,GFUI031964-PA,F,GO:0000166,nucleotide binding,373.2026,0.212997,2.468432
30419,GFUI031964-PA,F,GO:0003824,catalytic activity,345.605884,0.546336,1.410349
30418,GFUI031964-PA,F,GO:0030145,manganese ion binding,324.756461,0.058359,7.208685


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
29956,GFUI031966-PA,P,GO:0016192,vesicle-mediated transport,7400.862963,0.388379,9.503609
29957,GFUI031966-PA,C,GO:0016021,integral component of membrane,2956.820968,0.388879,3.692502
29955,GFUI031966-PA,P,GO:0006810,transport,2421.413744,0.787732,5.069855


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
51722,GFUI043720-PA,P,GO:0006470,protein dephosphorylation,2077.021281,0.236174,11.013535
51725,GFUI043720-PA,P,GO:0035335,peptidyl-tyrosine dephosphorylation,1854.192243,0.104459,11.690944
51724,GFUI043720-PA,P,GO:0016311,dephosphorylation,1327.332234,0.334633,9.783756
51729,GFUI043720-PA,F,GO:0004725,protein tyrosine phosphatase activity,1106.817502,0.176805,8.510896
51726,GFUI043720-PA,F,GO:0016787,hydrolase activity,1088.596949,0.857645,2.643033
51730,GFUI043720-PA,F,GO:0008138,protein tyrosine/serine/threonine phosphatase ...,904.445986,0.147903,10.023433
51728,GFUI043720-PA,F,GO:0004721,phosphoprotein phosphatase activity,837.540183,0.541206,7.88706
51727,GFUI043720-PA,F,GO:0016791,phosphatase activity,663.461735,0.703193,6.651616
51733,GFUI043720-PA,C,GO:0005634,nucleus,555.072824,0.429894,5.725783
51732,GFUI043720-PA,C,GO:0005737,cytoplasm,497.84608,0.152037,3.178476


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
31340,GFUI027643-PA,F,GO:0019904,protein domain specific binding,9672.974692,0.327969,9.857636
31341,GFUI027643-PA,C,GO:0005737,cytoplasm,1363.374577,0.478286,3.178476
31338,GFUI027643-PA,P,GO:0006605,protein targeting,1012.7697,0.096342,10.245803
31342,GFUI027643-PA,C,GO:0005634,nucleus,499.670055,0.089482,5.725783
31344,GFUI027643-PA,C,GO:0070062,extracellular vesicular exosome,499.264422,0.08972,8.572522
31339,GFUI027643-PA,F,GO:0005515,protein binding,488.165307,0.88836,5.471582
31343,GFUI027643-PA,C,GO:0042470,melanosome,254.58166,0.053257,12.405744


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
28375,GFUI031821-PA,P,GO:0042744,hydrogen peroxide catabolic process,763.121662,0.28037,12.73498
28376,GFUI031821-PA,P,GO:0006979,response to oxidative stress,558.429852,0.560932,9.299953
28378,GFUI031821-PA,F,GO:0004096,catalase activity,341.530977,0.165732,9.327078
28379,GFUI031821-PA,F,GO:0004601,peroxidase activity,267.707446,0.33143,7.315387


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
30235,GFUI031730-PA,C,GO:0016020,membrane,268.385576,0.462748,2.472824


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
53910,GFUI012131-PA,C,GO:0005737,cytoplasm,1949.581495,0.545487,3.178476
53907,GFUI012131-PA,P,GO:0031532,actin cytoskeleton reorganization,1245.065606,0.085014,14.592686
53908,GFUI012131-PA,P,GO:0008360,regulation of cell shape,887.60299,0.0887,10.016157
53909,GFUI012131-PA,P,GO:0007165,signal transduction,416.501526,0.09102,7.045163


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
53724,GFUI012127-PA,P,GO:0005975,carbohydrate metabolic process,436.848855,0.919457,6.570816


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
17165,GFUI040907-PA,P,GO:0045893,"positive regulation of transcription, DNA-temp...",2090.727684,0.178001,11.311674
17172,GFUI040907-PA,F,GO:0003713,transcription coactivator activity,1739.057158,0.456333,10.30776
17174,GFUI040907-PA,C,GO:0000124,SAGA complex,1342.735308,0.293517,12.474737
17166,GFUI040907-PA,P,GO:0016568,chromatin modification,1132.336165,0.287829,11.182522
17167,GFUI040907-PA,P,GO:0016578,histone deubiquitination,781.913382,0.11641,15.939007
17171,GFUI040907-PA,F,GO:0008270,zinc ion binding,705.260729,0.244602,5.400313
17173,GFUI040907-PA,F,GO:0030374,ligand-dependent nuclear receptor transcriptio...,509.824142,0.191065,12.400847
17176,GFUI040907-PA,C,GO:0071819,DUBm complex,465.209855,0.16771,13.557214
17170,GFUI040907-PA,F,GO:0046872,metal ion binding,459.235591,0.489532,3.508837
17169,GFUI040907-PA,P,GO:0006355,"regulation of transcription, DNA-templated",430.668044,0.286174,7.524266


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
58179,GFUI012133-PA,F,GO:0003735,structural constituent of ribosome,7726.333005,0.4538,5.691408
58177,GFUI012133-PA,P,GO:0006412,translation,4932.573498,0.531071,7.706853
58178,GFUI012133-PA,F,GO:0000166,nucleotide binding,2714.474945,0.376855,2.468432
58182,GFUI012133-PA,C,GO:0005840,ribosome,1485.478141,0.361607,5.343685
58183,GFUI012133-PA,C,GO:0030529,ribonucleoprotein complex,1182.538182,0.557423,4.674508
58180,GFUI012133-PA,C,GO:0005622,intracellular,949.22038,0.930388,1.978874
58181,GFUI012133-PA,C,GO:0022627,cytosolic small ribosomal subunit,455.43898,0.096004,11.656136
58176,GFUI012133-PA,P,GO:0000462,maturation of SSU-rRNA from tricistronic rRNA ...,393.342578,0.095409,16.008262


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
65847,GFUI025370-PA,P,GO:0050909,sensory perception of taste,4693.718356,0.40573,14.238183
65848,GFUI025370-PA,C,GO:0005886,plasma membrane,736.681997,0.285953,4.357169
65850,GFUI025370-PA,C,GO:0016021,integral component of membrane,733.780257,0.31001,3.692502
65849,GFUI025370-PA,C,GO:0016020,membrane,423.05616,0.88361,2.472824


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
53498,GFUI015483-PA,P,GO:0006508,proteolysis,3547.837752,0.183137,8.247696
53501,GFUI015483-PA,F,GO:0004252,serine-type endopeptidase activity,1280.255292,0.17087,7.037845
53499,GFUI015483-PA,F,GO:0016787,hydrolase activity,940.492831,0.647234,2.643033
53503,GFUI015483-PA,F,GO:0008236,serine-type peptidase activity,807.216196,0.327086,6.338852
53500,GFUI015483-PA,F,GO:0003824,catalytic activity,591.472295,0.82794,1.410349
53502,GFUI015483-PA,F,GO:0008233,peptidase activity,512.027869,0.482263,4.436037
53504,GFUI015483-PA,C,GO:0005576,extracellular region,335.567786,0.388421,3.024034


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
42160,GFUI021051-PA,F,GO:0004910,"interleukin-1, Type II, blocking receptor acti...",383.045002,0.241302,15.367774
42159,GFUI021051-PA,F,GO:0004908,interleukin-1 receptor activity,327.471351,0.482605,13.138158


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
56244,GFUI013780-PA,C,GO:0005634,nucleus,2101.57514,0.674396,5.725783
56243,GFUI013780-PA,F,GO:0046983,protein dimerization activity,870.246329,0.18021,6.942763
56242,GFUI013780-PA,F,GO:0003677,DNA binding,425.597921,0.46113,3.83101
56240,GFUI013780-PA,P,GO:0007517,muscle organ development,383.978856,0.155589,12.579326
56241,GFUI013780-PA,P,GO:0006351,"transcription, DNA-templated",219.109867,0.179396,6.856772


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
31399,GFUI030010-PA,C,GO:0016020,membrane,678.524157,0.747154,2.472824
31397,GFUI030010-PA,F,GO:0003824,catalytic activity,574.037413,0.857252,1.410349
31398,GFUI030010-PA,F,GO:0016787,hydrolase activity,334.822471,0.58564,2.643033
31400,GFUI030010-PA,C,GO:0016021,integral component of membrane,274.010819,0.257963,3.692502


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
21796,GFUI050260-PA,P,GO:0007339,binding of sperm to zona pellucida,1043.953299,0.556041,15.898589
21795,GFUI050260-PA,P,GO:0001675,acrosome assembly,534.626246,0.43447,17.067389
21797,GFUI050260-PA,C,GO:0001669,acrosomal vesicle,352.336524,0.189461,12.029073


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
45151,GFUI018582-PA,F,GO:0003676,nucleic acid binding,675.005734,0.718406,3.285748
45150,GFUI018582-PA,P,GO:0031047,gene silencing by RNA,461.742948,0.159529,12.963179
45153,GFUI018582-PA,C,GO:0005737,cytoplasm,209.601443,0.506259,3.178476
45152,GFUI018582-PA,C,GO:0043186,P granule,200.303104,0.175656,13.159434


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
62224,GFUI026161-PA,F,GO:0003998,acylphosphatase activity,8026.945865,0.414146,10.560419
62223,GFUI026161-PA,F,GO:0016787,hydrolase activity,1322.473586,0.744548,2.643033
62222,GFUI026161-PA,P,GO:0008152,metabolic process,648.61106,0.902929,0.544703
62225,GFUI026161-PA,C,GO:0005739,mitochondrion,358.489468,0.425272,5.085303
62226,GFUI026161-PA,C,GO:0070062,extracellular vesicular exosome,323.012844,0.315554,8.572522


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
65148,GFUI024781-PA,C,GO:0005634,nucleus,10621.86727,0.755397,5.725783
65147,GFUI024781-PA,F,GO:0046983,protein dimerization activity,3304.854129,0.207545,6.942763
65146,GFUI024781-PA,F,GO:0003677,DNA binding,2656.653352,0.286525,3.83101
65136,GFUI024781-PA,P,GO:0006355,"regulation of transcription, DNA-templated",2501.035698,0.251898,7.524266
65143,GFUI024781-PA,P,GO:0006351,"transcription, DNA-templated",2280.250477,0.366415,6.856772
65144,GFUI024781-PA,F,GO:0046872,metal ion binding,2027.903147,0.222844,3.508837
65139,GFUI024781-PA,P,GO:0007275,multicellular organismal development,1176.49627,0.417411,7.690822
65138,GFUI024781-PA,P,GO:0045893,"positive regulation of transcription, DNA-temp...",542.119265,0.097036,11.311674
65137,GFUI024781-PA,P,GO:0045944,positive regulation of transcription from RNA ...,421.962369,0.050044,12.597549
65145,GFUI024781-PA,F,GO:0000978,RNA polymerase II core promoter proximal regio...,295.782417,0.034582,11.041436


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
21803,GFUI052519-PA,C,GO:0005886,plasma membrane,2455.3519,0.403988,4.357169
21802,GFUI052519-PA,F,GO:0004871,signal transducer activity,2451.477881,0.761515,5.118474
21800,GFUI052519-PA,F,GO:0004930,G-protein coupled receptor activity,1657.904329,0.565223,7.732231
21799,GFUI052519-PA,P,GO:0007186,G-protein coupled receptor signaling pathway,1221.37985,0.197973,10.590143
21805,GFUI052519-PA,C,GO:0016021,integral component of membrane,1164.371552,0.342725,3.692502
21798,GFUI052519-PA,P,GO:0007165,signal transduction,1009.820687,0.320773,7.045163
21804,GFUI052519-PA,C,GO:0016020,membrane,585.449977,0.799132,2.472824
21801,GFUI052519-PA,F,GO:0004989,octopamine receptor activity,350.337777,0.05641,15.13725


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
30858,GFUI031959-PA,C,GO:0030288,outer membrane-bounded periplasmic space,223.953617,0.438588,7.155051


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
61530,GFUI022296-PA,F,GO:0003723,RNA binding,914.840481,0.52376,4.494763
61531,GFUI022296-PA,C,GO:0005634,nucleus,377.710349,0.497151,5.725783


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
57790,GFUI013391-PA,F,GO:0008168,methyltransferase activity,1826.916882,0.240918,4.628874
57788,GFUI013391-PA,P,GO:0032259,methylation,1159.736404,0.180507,8.301193
57789,GFUI013391-PA,F,GO:0016740,transferase activity,972.749715,0.432009,2.4874
57787,GFUI013391-PA,P,GO:0010629,negative regulation of gene expression,596.987167,0.096247,9.607679
57791,GFUI013391-PA,C,GO:0005634,nucleus,419.091132,0.332384,5.725783


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
56166,GFUI013397-PA,C,GO:0005634,nucleus,368.885066,0.362875,5.725783
56164,GFUI013397-PA,F,GO:0016740,transferase activity,343.930634,0.413165,2.4874
56165,GFUI013397-PA,F,GO:0008168,methyltransferase activity,341.734384,0.240991,4.628874
56163,GFUI013397-PA,P,GO:0032259,methylation,289.936481,0.161049,8.301193


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
62603,GFUI026165-PA,F,GO:0003713,transcription coactivator activity,1474.479766,0.203888,10.30776
62604,GFUI026165-PA,C,GO:0005634,nucleus,1332.088863,0.538393,5.725783


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
33802,GFUI007439-PA,F,GO:0008017,microtubule binding,979.075875,0.309808,9.436151
33803,GFUI007439-PA,F,GO:0051010,microtubule plus-end binding,552.26475,0.120057,13.296078
33806,GFUI007439-PA,C,GO:0005737,cytoplasm,490.308087,0.392804,3.178476
33805,GFUI007439-PA,C,GO:0000776,kinetochore,260.023142,0.027808,10.281686
33804,GFUI007439-PA,C,GO:0005874,microtubule,248.222152,0.201705,8.766752


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
27734,GFUI027255-PA,F,GO:0008199,ferric iron binding,3520.667148,0.294476,8.65978
27735,GFUI027255-PA,C,GO:0005739,mitochondrion,1119.291606,0.695381,5.085303


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content


Unnamed: 0,Sequence,Aspect,GO ID,Name,Total Score,Internal Confidence,Information Content
16830,GFUI041872-PA,F,GO:0004896,cytokine receptor activity,259.772571,0.563888,10.488532


# Functional Annotation of the genes associated with the top 5 environment SNPs: