# Create a Protein Disease graph from the DB adapter 'OlegDB'

In [None]:
from ProteinGraphML.DataAdapter import OlegDB
from ProteinGraphML.GraphTools import ProteinDiseaseAssociationGraph

## we construct a base map of protein to disease just by creating the ProteinDiseaseAssociationGraph

In [None]:
dbAdapter = OlegDB()

proteinGraph = ProteinDiseaseAssociationGraph(dbAdapter)

## the 'ProteinDiseaseAssociationGraph' object has helper methods, but we can also access the networkx graph directly it is created with:

In [None]:
len(proteinGraph.graph.nodes)

## we will want to filter by the proteins we are interested in, this list comes from a DB adapter, but any set will do

In [None]:

proteins = dbAdapter.loadTotalProteinList().protein_id
filterByProteins = set(proteins) # NOTE this is just a list of protein IDS

using .attach will add edges from a DB as defined by the adapter, 
with this method we can create a graph of data, which can itself be saved, prevents the need from 
rebuilding as we work on different diseases, perform analysis

We've also filter by proteins we care about, in this case it is our original list 

___NOTE THIS CELL TAKES A FEW MINUTES___

In [None]:
proteinGraph.attach(dbAdapter.loadPPI(filterByProteins))
proteinGraph.attach(dbAdapter.loadKegg(filterByProteins)) 
proteinGraph.attach(dbAdapter.loadReactome(filterByProteins)) 
proteinGraph.attach(dbAdapter.loadInterpro(filterByProteins))
proteinGraph.attach(dbAdapter.loadGo(filterByProteins))

networkx provides an api we can nodes from 

* here i exploit the unique features of each node to count them
* we can get a count of the nodes in the current graph

In [None]:
keggNodes = [g for g in list(proteinGraph.graph.nodes) if isinstance(g,str) and g[0:3] == "hsa"] # compute node lists
reactome = [r for r in list(proteinGraph.graph.nodes) if isinstance(r,str) and r[0:2] == "R-"]
goNodes = [go for go in list(proteinGraph.graph.nodes) if isinstance(go,str) and go[0:3] == "GO:"]
interNodes = [inter for inter in list(proteinGraph.graph.nodes) if isinstance(inter,str) and inter[0:3] == "IPR"]


print("KEGG",len(keggNodes))
print("REACT",len(reactome))
print("GO",len(goNodes))
print("INTERP",len(interNodes))

In [None]:
# this will save our graph
proteinGraph.save("CURRENT_GRAPH")