In [2]:
from ProteinGraphML.DataAdapter import OlegDB
from ProteinGraphML.GraphTools import ProteinDiseaseAssociationGraph

# we will create a Protein Disease graph from the DB adapter 'OlegDB'

In [4]:
dbAdapter = OlegDB()

proteinGraph = ProteinDiseaseAssociationGraph(dbAdapter)

# we construct a base map of protein to disease just by creating the ProteinDiseaseAssociationGraph

In [6]:
# the 'ProteinDiseaseAssociationGraph' object has helper methods, but we can also access the networkx graph directly it is created with:
len(proteinGraph.graph.nodes)


5213

In [4]:
# we will want to filter by the proteins we are interested in, this list comes from a DB adapter, but any set will do

proteins = dbAdapter.loadTotalProteinList().protein_id
filterByProteins = set(proteins) # NOTE this is just a list of protein IDS

In [5]:
'''
using .attach will add edges from a DB as defined by the adapter, 
with this method we can create a graph of data, which can itself be saved, prevents the need from 
rebuilding as we work on different diseases, perform analysis

# NOTE THIS CELL TAKES A FEW MINUTES #

'''
proteinGraph.attach(dbAdapter.loadPPI(filterByProteins))
proteinGraph.attach(dbAdapter.loadKegg(filterByProteins)) 
proteinGraph.attach(dbAdapter.loadReactome(filterByProteins)) 
proteinGraph.attach(dbAdapter.loadInterpro(filterByProteins))
proteinGraph.attach(dbAdapter.loadGo(filterByProteins))

#we've also filter by proteins we care about, in this case it is our original list 




In [7]:
# networkx provides an api we can nodes from 
 #- here i exploit the unique features of each node to count them

keggNodes = [g for g in list(proteinGraph.graph.nodes) if isinstance(g,str) and g[0:3] == "hsa"] # compute node lists
reactome = [r for r in list(proteinGraph.graph.nodes) if isinstance(r,str) and r[0:2] == "R-"]
goNodes = [go for go in list(proteinGraph.graph.nodes) if isinstance(go,str) and go[0:3] == "GO:"]
interNodes = [inter for inter in list(proteinGraph.graph.nodes) if isinstance(inter,str) and inter[0:3] == "IPR"]

# we can get a count of the nodes in the current graph
print("KEGG",len(keggNodes))
print("REACT",len(reactome))
print("GO",len(goNodes))
print("INTERP",len(interNodes))

KEGG 320
REACT 2130
GO 17531
INTERP 14244


In [8]:
# this will save our graph
proteinGraph.save("CURRENT_GRAPH")