# Add some additional layers of information to the hierarchy

Requires DDOT for ontology parsing https://github.com/michaelkyu/ddot/blob/master/examples/Tutorial.ipynb

In [1]:
# load required packages

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import pandas as pd
import random

import ndex2
import getpass

from IPython.display import display

# latex rendering of text in graphs
import matplotlib as mpl
mpl.rc('text', usetex = False)
mpl.rc('font', family = 'serif')

from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']

sns.set(font_scale=1.4)

sns.set_style('white')

sns.set_style("ticks", {"xtick.major.size": 15, "ytick.major.size": 15})
plt.rcParams['svg.fonttype'] = 'none'

import sys

# % matplotlib inline

In [2]:
# need ddot to parse the ontology
import ddot
from ddot import Ontology

In [3]:
# ddot only compatible with nx 1.11....
nx.__version__

'1.11'

# Load interactome (needs to be the same one used in previous analysis)

In [61]:

interactome_uuid='4de852d9-9908-11e9-bcaf-0ac135e8bacf'
ndex_server='public.ndexbio.org'
ndex_user=None
ndex_password=None
G_PC = ndex2.create_nice_cx_from_server(
            ndex_server, 
            username=ndex_user, 
            password=ndex_password, 
            uuid=interactome_uuid
        ).to_networkx()

# relabel nodes.... this is a nx 1.11 thing...
G_PC_data = pd.DataFrame.from_dict(dict(G_PC.nodes(data=True)), orient='index')
G_PC = nx.relabel_nodes(G_PC,dict(zip(G_PC_data.index.tolist(),G_PC_data['name'].tolist())))
print(G_PC.nodes()[0:5])
nodes = G_PC.nodes()

# print out interactome num nodes and edges for diagnostic purposes
print('number of nodes:')
print(len(G_PC.nodes()))
print('\nnumber of edges:')
print(len(G_PC.edges()))

['UBE2Q1', 'RNF14', 'UBE2Q2', 'TMCO1', 'UBAC1']
number of nodes:
18820

number of edges:
2693250


# Load ASD rare-common hierarchy from ndex


In [94]:
ASD_uuid='0ecb653c-9405-11eb-9e72-0ac135e8bacf'
ndex_server='public.ndexbio.org'
ndex_user='sbrosenthal@health.ucsd.edu'
ndex_password=getpass.getpass('NDEx password: ')
G_ASD = ndex2.create_nice_cx_from_server(
            ndex_server, 
            username=ndex_user, 
            password=ndex_password, 
            uuid=ASD_uuid
        ).to_networkx()
nodes = G_ASD.nodes()

# print out interactome num nodes and edges for diagnostic purposes
print('number of nodes:')
print(len(G_ASD.nodes()))
print('\nnumber of edges:')
print(len(G_ASD.edges()))

NDEx password: ········
number of nodes:
73

number of edges:
74


In [95]:
G_ASD.nodes(data=True)[0:2]

[(6548,
  {'CD_MemberList': 'NSG2 PSD2 SLC17A6 STXBP5L',
   'CD_AnnotatedMembers_Size': '1',
   'CD_AnnotatedMembers_Pvalue': '0.007995642253764',
   'CD_AnnotatedMembers': 'SLC17A6',
   'CD_CommunityName': 'Organic anion transporters Homo sapiens R-HSA-428643',
   'CD_AnnotatedMembers_Overlap': '0.25',
   'HiDeF_persistence': '5',
   'CD_MemberList_LogSize': '2.0',
   'CD_Labeled': 'true',
   'CD_AnnotatedAlgorithm': 'Annotated by Enrichr [Docker: coleslawndex/cdenrichrgenestoterm:0.4.0] {{--maxpval=0.05, --genesets=GO_Biological_Process_2018,GO_Cellular_Component_2018,GO_Molecular_Function_2018,KEGG_2019_Human,Reactome_2016}} via CyCommunityDetection Cytoscape App (1.11.0)',
   'CD_NonAnnotatedMembers': 'NSG2 PSD2 STXBP5L',
   'CD_AnnotatedMembers_SourceDB': 'Reactome_2016',
   'CD_MemberList_Size': '4',
   'name': 'C435'}),
 (6547,
  {'CD_MemberList': 'EN1 GSX2 IRX3 NKX6-2',
   'CD_AnnotatedMembers_Size': '1',
   'CD_AnnotatedMembers_Pvalue': '0.0249546954540196',
   'CD_AnnotatedMe

In [108]:
# add node attributes to dataframe for easier access
ASD_clust_df = pd.DataFrame.from_dict(dict(G_ASD.nodes(data=True)), orient='index')
# relabel nodes in G_ASD
G_ASD=nx.relabel_nodes(G_ASD,dict(ASD_clust_df['name']))
ASD_clust_df.index = ASD_clust_df['name']
# some columns are not the right type
ASD_clust_df['CD_MemberList_Size']=[int(x) for x in ASD_clust_df['CD_MemberList_Size'].tolist()]
ASD_clust_df['HiDeF_persistence']=[int(x) for x in ASD_clust_df['HiDeF_persistence'].tolist()]
ASD_clust_df.head()

Unnamed: 0_level_0,CD_MemberList,CD_AnnotatedMembers_Size,CD_AnnotatedMembers_Pvalue,CD_AnnotatedMembers,CD_CommunityName,CD_AnnotatedMembers_Overlap,HiDeF_persistence,CD_MemberList_LogSize,CD_Labeled,CD_AnnotatedAlgorithm,CD_NonAnnotatedMembers,CD_AnnotatedMembers_SourceDB,CD_MemberList_Size,name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
C435,NSG2 PSD2 SLC17A6 STXBP5L,1,0.007995642253764,SLC17A6,Organic anion transporters Homo sapiens R-HSA-...,0.25,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,NSG2 PSD2 STXBP5L,Reactome_2016,4,C435
C434,EN1 GSX2 IRX3 NKX6-2,1,0.0249546954540196,NKX6-2,regulation of myelination (GO:0031641),0.25,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,EN1 GSX2 IRX3,GO_Biological_Process_2018,4,C434
C433,CALM3 FSD1 GNAZ SRC,2,0.0034578149777732,SRC CALM3,GnRH signaling pathway,0.5,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,FSD1 GNAZ,KEGG_2019_Human,4,C433
C432,ABCC8 CDK5R2 KIF1A MKRN3,1,0.0137533444679581,ABCC8,ABC transporters,0.25,10,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,CDK5R2 KIF1A MKRN3,KEGG_2019_Human,4,C432
C431,KIF1A MKRN3 TBC1D14 VPS41,2,0.0016952190930884,VPS41 TBC1D14,regulation of vesicle fusion (GO:0031338),0.5,7,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,KIF1A MKRN3,GO_Biological_Process_2018,4,C431


# Add some annotation the the systems 

- Fraction rare/common/neither seeds


## Do we need to pull down the other network for this?

In [109]:
ASD_clust_df.head()

Unnamed: 0_level_0,CD_MemberList,CD_AnnotatedMembers_Size,CD_AnnotatedMembers_Pvalue,CD_AnnotatedMembers,CD_CommunityName,CD_AnnotatedMembers_Overlap,HiDeF_persistence,CD_MemberList_LogSize,CD_Labeled,CD_AnnotatedAlgorithm,CD_NonAnnotatedMembers,CD_AnnotatedMembers_SourceDB,CD_MemberList_Size,name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
C435,NSG2 PSD2 SLC17A6 STXBP5L,1,0.007995642253764,SLC17A6,Organic anion transporters Homo sapiens R-HSA-...,0.25,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,NSG2 PSD2 STXBP5L,Reactome_2016,4,C435
C434,EN1 GSX2 IRX3 NKX6-2,1,0.0249546954540196,NKX6-2,regulation of myelination (GO:0031641),0.25,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,EN1 GSX2 IRX3,GO_Biological_Process_2018,4,C434
C433,CALM3 FSD1 GNAZ SRC,2,0.0034578149777732,SRC CALM3,GnRH signaling pathway,0.5,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,FSD1 GNAZ,KEGG_2019_Human,4,C433
C432,ABCC8 CDK5R2 KIF1A MKRN3,1,0.0137533444679581,ABCC8,ABC transporters,0.25,10,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,CDK5R2 KIF1A MKRN3,KEGG_2019_Human,4,C432
C431,KIF1A MKRN3 TBC1D14 VPS41,2,0.0016952190930884,VPS41 TBC1D14,regulation of vesicle fusion (GO:0031338),0.5,7,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,KIF1A MKRN3,GO_Biological_Process_2018,4,C431


# System validation with mammalian phenotype ontology + mouse KO data

Parse the ontology, data from http://www.informatics.jax.org/vocab/mp_ontology

In [18]:
from scipy.stats import hypergeom
from statsmodels.stats import contingency_tables

In [6]:
mgi_df = pd.read_csv('../data/MGI/MGI_PhenoGenoMP.rpt',sep='\t',
                    names=['MGI_Allele_Accession_ID','Allele symbol','involves','MP','PMID','MGI_marker_accession_ID'])
gene_name = [a.split('<')[0] for a in mgi_df['Allele symbol'].tolist()]
mgi_df['gene_name']=gene_name
mgi_df.index=mgi_df['gene_name']
mgi_df.head()

Unnamed: 0_level_0,MGI_Allele_Accession_ID,Allele symbol,involves,MP,PMID,MGI_marker_accession_ID,gene_name
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Rb1,Rb1<tm1Tyj>/Rb1<tm1Tyj>,Rb1<tm1Tyj>,involves: 129S2/SvPas,MP:0000600,12529408,MGI:97874,Rb1
Rb1,Rb1<tm1Tyj>/Rb1<tm1Tyj>,Rb1<tm1Tyj>,involves: 129S2/SvPas,MP:0001716,16449662,MGI:97874,Rb1
Rb1,Rb1<tm1Tyj>/Rb1<tm1Tyj>,Rb1<tm1Tyj>,involves: 129S2/SvPas,MP:0001698,16449662,MGI:97874,Rb1
Rb1,Rb1<tm1Tyj>/Rb1<tm1Tyj>,Rb1<tm1Tyj>,involves: 129S2/SvPas,MP:0001092,16449662,MGI:97874,Rb1
Rb1,Rb1<tm1Tyj>/Rb1<tm1Tyj>,Rb1<tm1Tyj>,involves: 129S2/SvPas,MP:0000961,16449662,MGI:97874,Rb1


In [7]:
ddot.parse_obo('../data/MGI/mp.obo',
               '../data/MGI/parsed_mp.txt',
              '../data/MGI/id2name_mp.txt',
              '../data/MGI/id2namespace_mp.txt',
              '../data/MGI/altID_mp.txt')

In [8]:
MP2desc = pd.read_csv('../data/MGI/id2name_mp.txt',sep='\t',
                      names=['MP','description'],index_col='MP')

MP2desc=MP2desc.loc[MP2desc.index.dropna()] # drop NAN from index
print(len(MP2desc))


MP2desc.head()

14338


Unnamed: 0_level_0,description
MP,Unnamed: 1_level_1
MP:0000001,mammalian phenotype
MP:0000002,obsolete Morphology
MP:0000003,abnormal adipose tissue morphology
MP:0000011,abnormal adipose tissue morphology
MP:0000005,increased brown adipose tissue amount


In [9]:

hierarchy = pd.read_table('../data/MGI/parsed_mp.txt',
                          sep='\t',
                          header=None,
                          names=['Parent', 'Child', 'Relation', 'Namespace'])

hierarchy.head()

Unnamed: 0,Parent,Child,Relation,Namespace
0,MP:0005375,MP:0000003,is_a,MPheno.ontology
1,MP:0001778,MP:0000005,is_a,MPheno.ontology
2,MP:0001781,MP:0000008,is_a,MPheno.ontology
3,MP:0005334,MP:0000010,is_a,MPheno.ontology
4,MP:0000003,MP:0000013,is_a,MPheno.ontology


In [10]:
MPO = Ontology.from_table(
    table=hierarchy,
    parent='Parent',
    child='Child',
    add_root_name='MP:00SUPER',
    ignore_orphan_terms=True)
#MPO.clear_node_attr()
#MPO.clear_edge_attr()
MPO

0 genes, 13205 terms, 0 gene-term relations, 16647 term-term relations
node_attributes: []
edge_attributes: ['Relation', 'Namespace']

In [110]:
print(len(MPO.node_attr))

13205


In [22]:
# find terms related to brain
MP_focal_list = []
for t in MPO.node_attr.index.tolist():
    descr_temp = MPO.node_attr.loc[t]['description']
    if descr_temp.find('nervous')>-1:
        print(descr_temp)
        MP_focal_list.append(t)
    elif descr_temp.find('neuron')>-1:
        print(descr_temp)
        MP_focal_list.append(t)
    elif descr_temp.find('synapt')>-1:
        print(descr_temp)
        MP_focal_list.append(t)
        
print(len(MP_focal_list))

abnormal nervous system tract morphology
hippocampal neuron degeneration
abnormal motor neuron morphology
motor neuron degeneration
decreased motor neuron number
abnormal motor neuron innervation pattern
peripheral nervous system degeneration
abnormal sensory neuron morphology
decreased sensory neuron number
abnormal sensory neuron innervation pattern
abnormal hair guard neuron morphology
absent hair guard neurons
abnormal hair-tylotrich neuron morphology
abnormal hair-down neuron morphology
absent hair-down neurons
abnormal sympathetic neuron morphology
abnormal adrenergic neuron morphology
abnormal peptidergic neuron morphology
abnormal parasympathetic neuron morphology
abnormal cholinergic neuron morphology
abnormal enteric nervous system morphology
abnormal enteric neuron morphology
abnormal enteric cholinergic neuron morphology
absent enteric neurons
failure of neuromuscular synapse presynaptic differentiation
failure of neuromuscular synapse postsynaptic differentiation
impaired 

In [68]:
# test for enrichment in root node
OR_p_list,OR_CI_list,log_OR_list = [],[],[]

MP_keep_list = []

# root node is the largest node
root_node = ASD_clust_df['CD_MemberList_Size'].sort_values(ascending=False).head(1).index.tolist()[0]
for MP_focal in MP_focal_list:
    MP_desc_focal = dict(MP2desc['description'])[MP_focal]

    # focus the hierarchy on one branch, and look up all terms within that branch
    if len(MPO.parent_2_child[MP_focal])>0:
        MPO_focal = MPO.focus(MP_focal,verbose=False)
        focal_terms = MPO_focal.terms
    else: # if the term has no children, just look at that term
        focal_terms=[MP_focal]


    # check enrichment in root node
    focal_genes = ASD_clust_df['CD_MemberList'].loc[root_node].split(' ')
    

    mgi_temp = mgi_df[mgi_df['MP'].isin(focal_terms)]
    mgi_genes = list(np.unique(mgi_temp['gene_name']))
    mgi_genes = [g.upper() for g in mgi_genes]
    mgi_genes = list(np.intersect1d(mgi_genes,G_PC.nodes()))
    
    if len(mgi_genes)>10: # only test if there are at least 10 genes
        print('\n'+MP_desc_focal)
        print('number of genes in root node = '+str(len(focal_genes)))
        print('number of genes in focal MPO term = '+str(len(mgi_genes)))

        q00 = len(np.intersect1d(mgi_genes,focal_genes))
        print('number overlapping genes = '+str(q00))
        q01 = len(mgi_genes)-q00

        q10 = len(focal_genes)-q00
        q11 = len(G_PC.nodes())-q00-q01-q10

        table_temp = [[q00,q01],[q10,q11]]
        print(table_temp)

        CT= contingency_tables.Table2x2(table_temp)
        OR_p_temp = CT.log_oddsratio_pvalue()
        OR_CI_temp = CT.log_oddsratio_confint()
        log_OR_temp = CT.log_oddsratio
        print(OR_p_temp)
        print(OR_CI_temp)
        print(log_OR_temp)

        OR_p_list.append(OR_p_temp)
        OR_CI_list.append(OR_CI_temp)
        log_OR_list.append(log_OR_temp)
        
        MP_keep_list.append(MP_focal)

        print(hypergeom.sf(q00,len(G_PC.nodes()),len(focal_genes),len(mgi_genes)))
    
OR_CI_lower, OR_CI_upper = zip(*OR_CI_list)

root_KO_df = pd.DataFrame({'OR_p':OR_p_list,'OR_CI_lower':OR_CI_lower,'OR_CI_upper':OR_CI_upper},
                          index=MP_keep_list)


abnormal nervous system tract morphology
number of genes in root node = 292
number of genes in focal MPO term = 188
number overlapping genes = 15
[[15, 173], [277, 18355]]
2.335897013142731e-10
(1.2076770372746504, 2.289119755421405)
1.7483983963480276
4.122755360272459e-08

hippocampal neuron degeneration
number of genes in root node = 292
number of genes in focal MPO term = 25
number overlapping genes = 1
[[1, 24], [291, 18504]]
0.3405477911699869
(-1.029363442050749, 2.9780936548344226)
0.9743651063918368
0.05694876545560805

abnormal motor neuron morphology
number of genes in root node = 292
number of genes in focal MPO term = 166
number overlapping genes = 10
[[10, 156], [282, 18372]]
1.6354482238281865e-05
(0.7793266760396684, 2.079483444464491)
1.4294050602520798
5.8329668688649927e-05

motor neuron degeneration
number of genes in root node = 292
number of genes in focal MPO term = 42
number overlapping genes = 3
[[3, 39], [289, 18489]]
0.008125918692104933
(0.41351907905777385


decreased neuron apoptosis
number of genes in root node = 292
number of genes in focal MPO term = 70
number overlapping genes = 2
[[2, 68], [290, 18460]]
0.3836661701815005
(-0.7837931040950523, 2.038033225015376)
0.6271200604601619
0.09501981503733828

neuron degeneration
number of genes in root node = 292
number of genes in focal MPO term = 387
number overlapping genes = 16
[[16, 371], [276, 18157]]
7.082414538546484e-05
(0.5284282899205166, 1.5571661784512119)
1.0427972341858642
0.0001233673637615258

loss of hippocampal neurons
number of genes in root node = 292
number of genes in focal MPO term = 15
number overlapping genes = 0
[[0, 15], [292, 18513]]
0.6030145036163768
(-2.0717188299891993, 3.568273396106678)
0.7482772830587394
0.20914723347278139

loss of cortex neurons
number of genes in root node = 292
number of genes in focal MPO term = 17
number overlapping genes = 1
[[1, 16], [291, 18512]]
0.18126925286086726
(-0.6433378953410438, 3.403862815385411)
1.3802624600221836
0.02


increased sensory neuron number
number of genes in root node = 292
number of genes in focal MPO term = 52
number overlapping genes = 7
[[7, 45], [285, 18483]]
1.8188310568892242e-08
(1.5064848946321163, 3.11624540276593)
2.311365148699023
1.2700627284175902e-06

abnormal neuronal migration
number of genes in root node = 292
number of genes in focal MPO term = 127
number overlapping genes = 7
[[7, 120], [285, 18408]]
0.0007464658653372937
(0.5554449064697811, 2.097494808969857)
1.326469857719819
0.000830854165117607

abnormal retinal neuronal layer morphology
number of genes in root node = 292
number of genes in focal MPO term = 350
number overlapping genes = 11
[[11, 339], [281, 18189]]
0.017454251392781988
(0.13020010517321612, 1.354025476474817)
0.7421127908240166
0.008807473221156468

abnormal olfactory sensory neuron morphology
number of genes in root node = 292
number of genes in focal MPO term = 24
number overlapping genes = 2
[[2, 22], [290, 18506]]
0.01765201029461214
(0.30590


decreased neuronal precursor proliferation
number of genes in root node = 292
number of genes in focal MPO term = 11
number overlapping genes = 0
[[0, 11], [292, 18517]]
0.46446474434102136
(-1.7778115670155863, 3.895108071838024)
1.0586482524112188
0.1580658166929844

abnormal postsynaptic density morphology
number of genes in root node = 292
number of genes in focal MPO term = 16
number overlapping genes = 0
[[0, 16], [292, 18512]]
0.6343194783497113
(-2.1334719801344324, 3.500841468865331)
0.6836847443654492
0.2214274253135972


In [74]:
root_KO_df['MP_description']=root_KO_df.index.map(dict(MP2desc['description']))

root_KO_df=root_KO_df.sort_values('OR_p')
root_KO_df.head(10)

Unnamed: 0,OR_p,OR_CI_lower,OR_CI_upper,MP_description
MP:0002272,1.3760450000000002e-28,1.752705,2.505077,abnormal nervous system electrophysiology
MP:0003631,7.252507e-28,1.069838,1.536731,nervous system phenotype
MP:0003633,4.243104e-27,1.131384,1.634166,abnormal nervous system physiology
MP:0003632,8.548397e-26,1.044977,1.524572,abnormal nervous system morphology
MP:0002882,3.1939559999999998e-21,1.053431,1.604292,abnormal neuron morphology
MP:0002206,4.06421e-20,1.203781,1.856768,abnormal CNS synaptic transmission
MP:0003635,1.031294e-19,1.125281,1.744317,abnormal synaptic transmission
MP:0004811,2.17403e-17,1.181332,1.891119,abnormal neuron physiology
MP:0003861,4.567624e-17,0.963277,1.549849,abnormal nervous system development
MP:0002752,2.769039e-15,0.966874,1.604834,abnormal somatic nervous system morphology


# Test for enrichment in all systems

In [122]:
MP_focal_top = root_KO_df.head(10).index.tolist()

MP_full_results_df=pd.DataFrame(index=ASD_clust_df.index.tolist())

for MP_focal in MP_focal_top:
    MP_desc_focal = dict(MP2desc['description'])[MP_focal]
    print(MP_desc_focal)

    # focus the hierarchy on one branch, and look up all terms within that branch
    if len(MPO.parent_2_child[MP_focal])>0:
        MPO_focal = MPO.focus(MP_focal)
        focal_terms = MPO_focal.terms
    else: # if the term has no children, just look at that term
        focal_terms=MP_focal


    hyper_p_list = []
    num_genes_list = []
    genes_id_list = []

    OR_p_list,OR_CI_list,log_OR_list=[],[],[]
    for focal_cluster in ASD_clust_df.index.tolist():
        #print(focal_cluster)
        mFocal_genes = ASD_clust_df['CD_MemberList'].loc[focal_cluster].split(' ')


        from scipy.stats import hypergeom
        M=len(list(G_PC.nodes())) # only keep genes in PCnet
        # Look up all entries matching focal_terms, and mFocal_genes
        mgi_temp = mgi_df[mgi_df['MP'].isin(focal_terms)]
        new_index=[g.upper() for g in mgi_temp.index.tolist()]
        mgi_temp.index=new_index

        N=len(np.intersect1d(list(np.unique(mgi_temp.index.tolist())),list(G_PC.nodes()))) # only keep genes in PCnet

        if len(np.intersect1d(mFocal_genes,mgi_temp.index.tolist()))>0:
            mgi_genes = mgi_temp.index.tolist()
            mgi_genes = list(np.intersect1d(mgi_genes,list(G_PC.nodes())))
            x = len(np.intersect1d(mFocal_genes,mgi_genes))
            #print(x)
            n=len(mFocal_genes)

            #mgi_temp['MP_description']=mgi_temp['MP'].map(dict(MP2desc['description']))



            #print(hypergeom.sf(x,M,n,N))
            hyper_p_list.append(hypergeom.sf(x,M,n,N))
            num_genes_list.append(x)
            genes_id_list.append(' '.join(list(np.intersect1d(mFocal_genes,mgi_genes))))

            q00 = len(np.intersect1d(mgi_genes,mFocal_genes))
            q01 = len(mgi_genes)-q00

            q10 = len(mFocal_genes)-q00
            q11 = len(list(G_PC.nodes()))-q00-q01-q10

            table_temp = [[q00,q01],[q10,q11]]

            CT= contingency_tables.Table2x2(table_temp)
            OR_p_temp = CT.log_oddsratio_pvalue()
            OR_CI_temp = CT.log_oddsratio_confint()
            log_OR_temp = CT.log_oddsratio
            #print(OR_p_temp)


            OR_p_list.append(OR_p_temp)
            OR_CI_list.append(OR_CI_temp)
            log_OR_list.append(log_OR_temp)


        else:
            hyper_p_list.append(1)
            num_genes_list.append(0)
            genes_id_list.append('')

            OR_p_list.append(1)
            OR_CI_list.append(0)
            log_OR_list.append(0)



    MP_focal_df = pd.DataFrame({MP_desc_focal+':-log(hyper_p)':-np.log10(hyper_p_list),
                                MP_desc_focal+':-log(OR_p)':-np.log10(OR_p_list),
                                MP_desc_focal+':log_OR':log_OR_list,
                                MP_desc_focal+':num_genes':num_genes_list,
                                MP_desc_focal+':gene_ids':genes_id_list},index=ASD_clust_df.index.tolist())

    if MP_desc_focal+':-log(hyper_p)' not in MP_full_results_df.columns.tolist():
        MP_full_results_df=MP_full_results_df.join(MP_focal_df)


abnormal nervous system electrophysiology
Genes and Terms to keep: 34
nervous system phenotype
Genes and Terms to keep: 2042




abnormal nervous system physiology
Genes and Terms to keep: 354
abnormal nervous system morphology
Genes and Terms to keep: 1687
abnormal neuron morphology
Genes and Terms to keep: 262
abnormal CNS synaptic transmission
Genes and Terms to keep: 43
abnormal synaptic transmission
Genes and Terms to keep: 132
abnormal neuron physiology
Genes and Terms to keep: 46
abnormal nervous system development
Genes and Terms to keep: 191
abnormal somatic nervous system morphology
Genes and Terms to keep: 356


In [123]:
MP_full_results_df.head()

Unnamed: 0,abnormal nervous system electrophysiology:-log(hyper_p),abnormal nervous system electrophysiology:-log(OR_p),abnormal nervous system electrophysiology:log_OR,abnormal nervous system electrophysiology:num_genes,abnormal nervous system electrophysiology:gene_ids,nervous system phenotype:-log(hyper_p),nervous system phenotype:-log(OR_p),nervous system phenotype:log_OR,nervous system phenotype:num_genes,nervous system phenotype:gene_ids,...,abnormal nervous system development:-log(hyper_p),abnormal nervous system development:-log(OR_p),abnormal nervous system development:log_OR,abnormal nervous system development:num_genes,abnormal nervous system development:gene_ids,abnormal somatic nervous system morphology:-log(hyper_p),abnormal somatic nervous system morphology:-log(OR_p),abnormal somatic nervous system morphology:log_OR,abnormal somatic nervous system morphology:num_genes,abnormal somatic nervous system morphology:gene_ids
C435,-0.0,-0.0,0.0,0,,2.910936,1.581589,2.567393,3,PSD2 SLC17A6 STXBP5L,...,-0.0,-0.0,0.0,0,,-0.0,-0.0,0.0,0,
C434,4.715884,4.303365,4.063367,2,EN1 NKX6-2,2.910936,1.581589,2.567393,3,EN1 GSX2 NKX6-2,...,4.672897,2.89222,3.719366,3,EN1 GSX2 NKX6-2,5.136098,3.276389,4.003268,3,EN1 GSX2 NKX6-2
C433,-0.0,-0.0,0.0,0,,0.791776,0.125505,0.36947,1,GNAZ,...,1.597754,0.725707,1.520461,1,GNAZ,-0.0,-0.0,0.0,0,
C432,-0.0,-0.0,0.0,0,,1.646664,0.847538,1.468432,2,CDK5R2 KIF1A,...,1.597754,0.725707,1.520461,1,CDK5R2,-0.0,-0.0,0.0,0,
C431,-0.0,-0.0,0.0,0,,0.791776,0.125505,0.36947,1,KIF1A,...,-0.0,-0.0,0.0,0,,-0.0,-0.0,0.0,0,


# Update hierarchy on ndex

In [124]:
# add new fields as node attributes to hierarchy
MP_full_results_dict = MP_full_results_df.to_dict()


In [114]:
for key in MP_full_results_dict.keys():
    print(key)
    nx.set_node_attributes(G_ASD,key,MP_full_results_dict[key])

CD_MemberList
CD_AnnotatedMembers_Size
CD_AnnotatedMembers_Pvalue
CD_AnnotatedMembers
CD_CommunityName
CD_AnnotatedMembers_Overlap
HiDeF_persistence
CD_MemberList_LogSize
CD_Labeled
CD_AnnotatedAlgorithm
CD_NonAnnotatedMembers
CD_AnnotatedMembers_SourceDB
CD_MemberList_Size
name
abnormal nervous system electrophysiology:-log(hyper_p)
abnormal nervous system electrophysiology:-log(OR_p)
abnormal nervous system electrophysiology:log_OR
abnormal nervous system electrophysiology:num_genes
abnormal nervous system electrophysiology:gene_ids
nervous system phenotype:-log(hyper_p)
nervous system phenotype:-log(OR_p)
nervous system phenotype:log_OR
nervous system phenotype:num_genes
nervous system phenotype:gene_ids
abnormal nervous system physiology:-log(hyper_p)
abnormal nervous system physiology:-log(OR_p)
abnormal nervous system physiology:log_OR
abnormal nervous system physiology:num_genes
abnormal nervous system physiology:gene_ids
abnormal nervous system morphology:-log(hyper_p)
abnormal

In [115]:
pd.DataFrame.from_dict(dict(G_ASD.nodes(data=True)), orient='index').head()

Unnamed: 0,CD_MemberList,CD_AnnotatedMembers_Size,CD_AnnotatedMembers_Pvalue,CD_AnnotatedMembers,CD_CommunityName,CD_AnnotatedMembers_Overlap,HiDeF_persistence,CD_MemberList_LogSize,CD_Labeled,CD_AnnotatedAlgorithm,...,abnormal nervous system development:-log(hyper_p),abnormal nervous system development:-log(OR_p),abnormal nervous system development:log_OR,abnormal nervous system development:num_genes,abnormal nervous system development:gene_ids,abnormal somatic nervous system morphology:-log(hyper_p),abnormal somatic nervous system morphology:-log(OR_p),abnormal somatic nervous system morphology:log_OR,abnormal somatic nervous system morphology:num_genes,abnormal somatic nervous system morphology:gene_ids
C435,NSG2 PSD2 SLC17A6 STXBP5L,1,0.007995642253764,SLC17A6,Organic anion transporters Homo sapiens R-HSA-...,0.25,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,...,-0.0,-0.0,0.0,0,,-0.0,-0.0,0.0,0,
C367,ABCC8 ADARB2 BRINP3 C14orf132 CADM2 CALB1 CALB...,18,2.977322934006789e-14,GABRA2 UNC13B GABRB3 UNC13C GABRB2 CHRNA7 GAD1...,chemical synaptic transmission (GO:0007268),0.228,17,6.304,True,Annotated by Enrichr [Docker: coleslawndex/cde...,...,4.744992,5.097764,1.256907,16,CDK5R2 CHRNA7 CNTN1 GFAP GNAZ LGI1 MAPT MPZ NC...,5.597954,6.389371,1.462644,15,CHRNA7 GABRB2 GABRB3 GRM7 KIF5C MAPT MOG MPZ N...
C434,EN1 GSX2 IRX3 NKX6-2,1,0.0249546954540196,NKX6-2,regulation of myelination (GO:0031641),0.25,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,...,4.672897,2.89222,3.719366,3,EN1 GSX2 NKX6-2,5.136098,3.276389,4.003268,3,EN1 GSX2 NKX6-2
C372,EN1 GSX2 HOXA4 HOXA7 IRX1 IRX3 IRX6 MEIS3P1 MK...,3,0.0001677832487479,PKNOX1 PBX1 HOXA4,Activation of HOX genes during differentiation...,0.2,16,3.907,True,Annotated by Enrichr [Docker: coleslawndex/cde...,...,3.544148,3.35893,1.928662,5,EN1 GSX2 IRX1 MNX1 NKX6-2,4.183997,4.259458,2.213053,5,EN1 GSX2 IRX6 MNX1 NKX6-2
C433,CALM3 FSD1 GNAZ SRC,2,0.0034578149777732,SRC CALM3,GnRH signaling pathway,0.5,5,2.0,True,Annotated by Enrichr [Docker: coleslawndex/cde...,...,1.597754,0.725707,1.520461,1,GNAZ,-0.0,-0.0,0.0,0,


In [116]:
G_ASD_cx = ndex2.create_nice_cx_from_networkx(G_ASD)

In [119]:
# update the network on ndex
ndex_password=getpass.getpass('NDEx password: ') # enter password again (not sure if necessary)
G_ASD_cx.update_to(ASD_uuid,ndex_server,ndex_user,ndex_password)

NDEx password: ········
Generating CX


''

In [120]:
ASD_uuid


'0ecb653c-9405-11eb-9e72-0ac135e8bacf'