In [1]:
from ProteinGraphML.DataAdapter import OlegDB
from ProteinGraphML.GraphTools import ProteinDiseaseAssociationGraph

# we will create a Protein Disease graph from the DB adapter 'OlegDB'

In [2]:
dbAdapter = OlegDB()

proteinGraph = ProteinDiseaseAssociationGraph(dbAdapter)

# we construct a base map of protein to disease just by creating the ProteinDiseaseAssociationGraph

OperationalError: could not translate host name "hostit" to address: Name or service not known


In [12]:
# the 'ProteinDiseaseAssociationGraph' object has helper methods, but we can also access the networkx graph directly it is created with:
proteinGraph.graph.nodes


NodeView(('MP_0000062', 3256, 'MP_0000124', 'MP_0000137', 'MP_0000149', 'MP_0000150', 'MP_0000154', 'MP_0000194', 'MP_0000367', 'MP_0000432', 'MP_0000438', 'MP_0000443', 'MP_0000452', 'MP_0000455', 'MP_0000458', 'MP_0000550', 'MP_0000552', 'MP_0000556', 'MP_0000558', 'MP_0000559', 'MP_0000562', 'MP_0000564', 'MP_0000572', 'MP_0000579', 'MP_0000588', 'MP_0000745', 'MP_0001258', 'MP_0001284', 'MP_0001289', 'MP_0001293', 'MP_0001303', 'MP_0001304', 'MP_0001307', 'MP_0001312', 'MP_0001314', 'MP_0001319', 'MP_0001322', 'MP_0001325', 'MP_0001340', 'MP_0001399', 'MP_0001402', 'MP_0001406', 'MP_0001410', 'MP_0001486', 'MP_0004599', 'MP_0001489', 'MP_0001510', 'MP_0001512', 'MP_0001513', 'MP_0001523', 'MP_0001529', 'MP_0001539', 'MP_0001556', 'MP_0001566', 'MP_0001925', 'MP_0001926', 'MP_0002060', 'MP_0002067', 'MP_0002075', 'MP_0002098', 'MP_0002100', 'MP_0002102', 'MP_0002110', 'MP_0002111', 'MP_0002187', 'MP_0002544', 'MP_0002546', 'MP_0002626', 'MP_0002757', 'MP_0002758', 'MP_0002764', 'MP_

In [4]:
# we will want to filter by the proteins we are interested in, this list comes from a DB adapter, but any set will do

proteins = dbAdapter.loadTotalProteinList().protein_id
filterByProteins = set(proteins) # NOTE this is just a list of protein IDS

In [5]:
'''
using .attach will add edges from a DB as defined by the adapter, 
with this method we can create a graph of data, which can itself be saved, prevents the need from 
rebuilding as we work on different diseases, perform analysis

# NOTE THIS CELL TAKES A FEW MINUTES #

'''
proteinGraph.attach(dbAdapter.loadPPI(filterByProteins))
proteinGraph.attach(dbAdapter.loadKegg(filterByProteins)) 
proteinGraph.attach(dbAdapter.loadReactome(filterByProteins)) 
proteinGraph.attach(dbAdapter.loadInterpro(filterByProteins))
proteinGraph.attach(dbAdapter.loadGo(filterByProteins))

#we've also filter by proteins we care about, in this case it is our original list 




In [7]:
# networkx provides an api we can nodes from 
 #- here i exploit the unique features of each node to count them

keggNodes = [g for g in list(proteinGraph.graph.nodes) if isinstance(g,str) and g[0:3] == "hsa"] # compute node lists
reactome = [r for r in list(proteinGraph.graph.nodes) if isinstance(r,str) and r[0:2] == "R-"]
goNodes = [go for go in list(proteinGraph.graph.nodes) if isinstance(go,str) and go[0:3] == "GO:"]
interNodes = [inter for inter in list(proteinGraph.graph.nodes) if isinstance(inter,str) and inter[0:3] == "IPR"]

# we can get a count of the nodes in the current graph
print("KEGG",len(keggNodes))
print("REACT",len(reactome))
print("GO",len(goNodes))
print("INTERP",len(interNodes))

KEGG 320
REACT 2130
GO 17531
INTERP 14244


In [8]:
# this will save our graph
proteinGraph.save("CURRENT_GRAPH")