# Generating Graph-based heirarchies via eigenvector decomposition

This script is about extracting more data from the fact that the adjacency matrix, containing the eighted edges between nodes, can be diagonalised tos yield a set of eigenvector ($v_\lambda$) and eigvenvalues ($\lambda$). This application is called **spectral graph theory**: https://en.wikipedia.org/wiki/Spectral_graph_theory

The first eigenvalue $\lambda_1$ extracts the eigenvector centrality ($v_{\lambda1}$), which also guarantees that all of $v_{\lambda1}$ elements are >=0. This is convenient for many node-weighted analyses that don't play well with negative weights. Like most authors, we interpret this to be the influence graph - however, it is importance that it is only the influence of the **largest cohesive domain within the protein**. You will often note that for multidomain protein graphs, the centrality often identified only one domain, with all others as almost zero.

This is because spectral decomposition actually partitions the graph into functioning subunits. The negative values of subsequent $v_\lambda$ tells us 
something important about the intra-graph dynamics. Like the harmonics of a vibrating string, discrete domains will tend to anti-correlate so as to preserve the linear independence of spectral decomposition.

Thus, the relative values of domains tells us something important about the cooperativity between domains. Graph-wise, the level of interconnectedness. This scales from zero for completely independent subunits to some as yet unknown maximum for a single domain protein.

*Note: This is the MD-equivalent  of Kundu et al. (2006)'s work on using Gaussian Network Models to decompose proteins into discrete domains.*

*Also c.f. Mishra et al. (2018) as they compare the correlation networked generated from MD to GNM, and find that they largely agree.*

In [None]:
# Load the python package
import os
from dynetan.toolkit import *
from dynetan.viz import *
from dynetan.proctraj import *
from dynetan.gencor import *# Load the python package
from dynetan.contact import *
from dynetan.datastorage import *
from collections import Counter
from itertools import combinations
import community

from sklearn.cluster import SpectralClustering
import numpy as np
import pandas as pd

In [None]:
# For visualization
from bokeh.io import output_file, output_notebook, push_notebook, show
from bokeh import models as bokehModels
from bokeh import transform as bokehTransform
from bokeh import layouts as bokehLayouts
from bokeh import plotting as bokehPlotting
from bokeh import palettes as bokehPalettes
from bokeh import events as bokehEvents
# For pre-calculating CArtesian distances based on 2D embedding
from sklearn.manifold import MDS

In [None]:
mapResidueNames={'ALA':'A','CYS':'C','ASP':'D','GLU':'E','PHE':'F',
                 'GLY':'G','HIS':'H','HSD':'H','HSE':'H','ILE':'I','LYS':'K','LEU':'L',
                 'MET':'M','ASN':'N','PRO':'P','GLN':'Q','ARG':'R',
                 'SER':'S','THR':'T','VAL':'V','TRP':'W','TYR':'Y',
                 'MG':'Mg','POPC':'Popc',
                 'ATP':'Atp','GTP':'Gtp',
                 'NA':'Sod','SOD':'Sod','CLA':'Cl','CL':'Cl','POT':'Pot','K':'Pot',
                 'SOL':'h2o','HOH':'h2o','WAT':'h2o','TIP':'h2o','H2O':'h2o',
                }

def name_node(dnad, node):
    #i=dnad.nodesAtmSel[node].index
    resname=dnad.nodesAtmSel[node].resname ; resid=dnad.nodesAtmSel[node].resid
    return "%s%s" % (mapResidueNames[resname], resid)

def name_nodes_with_clarification( G ):
    """
    Make a basic automated effort to get a short name that uniquely identifies the node within the graph.
    """
    # = = = Make initial tentative assignment based on resname and resid.
    dictNodeNames = OrderedDict()
    dictNodeLookup = {}
    for x in G.nodes():
        tempName = "%s%s" % ( mapResidueNames[G.nodes[x]['resName']], G.nodes[x]['resID'] )
        dictNodeNames[x] = tempName
        if tempName not in dictNodeLookup:
            dictNodeLookup[tempName] = [x]
        else:
            dictNodeLookup[tempName].append(x)
            
    # = = = Go through again and identify non-unique naming, then iteratively solve for solutions.
    counter=Counter(dictNodeNames.values())
    for k, v in counter.most_common():
        if v==1:
            break
        # = = = First check if segIDs are different. If so, add as prefix.
        segIDs = [ G.nodes[x]['segID'] for x in dictNodeLookup[k] ]
        if len( np.unique(segIDs) )> 1:
            for x in dictNodeLookup[k]:
                dictNodeNames[x] = G.nodes[x]['segID']+"_"+dictNodeNames[x]

        # = = = Also check if atom names are different. If so, add as suffix.
        atomNames = [ G.nodes[x]['atomName'] for x in dictNodeLookup[k] ]
        if len( np.unique(atomNames) )> 1:
            for x in dictNodeLookup[k]:
                dictNodeNames[x] = dictNodeNames[x]+"_"+G.nodes[x]['atomName']
        
    return dictNodeNames

def name_graph_nodes_from_MDAnalysis(G, nodesAtomSel):
    print("...naming the nodes of the graph based on MDAnalysis.")
    resNames={} ; nodesegIDs={} ; nodeAtomNames={} ; nodeNames = {}
    for x in G.nodes():
        G.nodes[x]['segID']    = nodesAtomSel[x].segid
        G.nodes[x]['resName']  = nodesAtomSel[x].resname
        G.nodes[x]['resID']    = nodesAtomSel[x].resid
        G.nodes[x]['atomName'] = nodesAtomSel[x].name       

    # = = = Add atoms where there is multi-node residues.
    nodeNames = name_nodes_with_clarification( G )
    nx.set_node_attributes(G, nodeNames, "name")

In [None]:
def encode_community_into_graph(G, nameAttr='commLouvain'):
    """
    The same Louvain heuristics as DNA's work, but lifted out of the code-base.
    best_partition returns a dictionary of community assignments that can directly be used in networkx's set_node_attrinutes.
    However, it is not sorted by some particular metric.
    """
    d = community.best_partition(G)
    nx.set_node_attributes(G, d, nameAttr)

In [None]:
def extract_spectral_clustering(m, nMax, n_init=10):
    """
    https://scikit-learn.org/stable/modules/generated/sklearn.cluster.SpectralClustering.html
    """
    out = np.zeros((m.shape[0],nMax), dtype=int)
    c = SpectralClustering(1, affinity='precomputed', n_init=n_init)
    for i in range(2,nMax+1):
        c.set_params(n_clusters=i)
        out[:,i-1] = c.fit_predict(adjMat)
    return out

def convert_clusters_to_nodelist(l):
    nComm=len(np.unique(l))
    out=[ [] for _ in range(nComm) ]
    for i,c in enumerate(l):
        out[c].append(i)
    return out

def convert_dict_to_nodelist(d):
    labels = np.unique( np.asarray( list(d.values()), dtype=int ) )
    out=[ [] for _ in range(len(labels)) ]
    for k,d in d.items():
        out[d].append(k)
    return out    

In [None]:
def compute_eigenvector_overlap_inner(v1,v2):
    return np.linalg.norm( np.minimum( np.fabs(v1), np.fabs(v2) ) )
    #return np.dot(np.fabs(v[:,i]),np.fabs(v[:,j]))
    #return np.dot(np.square(v1),np.square(v2))
    #return np.dot(np.fabs(v1),np.fabs(v2))
    #return np.dot(v1,v2)

def compute_eigenvector_overlap(v, nVec=2):
    outArr = np.ones((nVec,nVec))        
    for i in range(nVec-1):
        for j in range(i+1,nVec):
            outArr[i,j]=outArr[j,i]=compute_eigenvector_overlap_inner(v[:,i],v[:,j])
    return outArr

In [None]:
def compute_excess_volume(v, nVec=None):
    if nVec == None:
        nVec = len(v)
    outArr = np.zeros((nVec))
    for i in range(nVec):
        outArr[i] = np.sum(v[:,i])/np.sum(np.fabs(v[:,i]))
    return outArr

In [None]:
def convert_booleans_to_binary(arrBools):
    return np.multiply(arrBools,np.power(2,np.arange(len(listBools)-1,-1,-1)))

def extract_spectral_heirarchy(v, nVec=2, bCompress=False):
    """
    Assumes right eigenvectors for now with v[:,i] corresponding to eigenvalue i
    """
    tempArr = np.zeros( (v.shape[0],nVec), dtype='bool' )
    tempArr = v[:,:nVec] < 0
    outArr  = np.zeros( (v.shape[0],nVec), dtype='int' )
    for i in range(1,nVec):
        # = = = Rank the highest eigenvector as the smallest contributor to the binary heirarchical decomposition
        powers = np.power(2,np.arange(i,-1,-1))*2**(nVec-i-1)
        #print( i, powers)
        outArr[:,i] = np.sum( np.multiply( tempArr[:,:i+1], powers ), axis=1)
        
    if bCompress:
        dictMap = { b:a for a,b in enumerate(np.unique(outArr)) }
        for i in range(outArr.shape[0]):
            for j in range(outArr.shape[1]):
                outArr[i,j]=dictMap[outArr[i,j]]
        return outArr
    else:
        return outArr

In [None]:
def build_structure_dataframe(atomSel, key, style='VMD'):
    """
    Returns a 3-column pandas data frame containing the segid, resid, and name for use in exporting.
    This makes selecting the atoms in a visualiser much easier
    """
    dataBlock = np.stack( (key, atomSel.segids,atomSel.resids,atomSel.names), axis=1)
    if style == 'VMD':
        colNames = ['key','segname','resid','name']
    elif style == 'MDA':
        colNames = ['key','segid','resid','name']
    return pd.DataFrame( dataBlock, columns = colNames )

In [None]:
def basicTicker():
    return bokehModels.BasicTicker(min_interval=1, max_interval=1, num_minor_ticks=1)

In [None]:
def plot_bokehfigure_vector(yVals, xVals=None, style='line', title="", pHeight=400, pWidth=1000):
    """
    Accepts Single vectors, and (nval, 2) vectors
    """
    if xVals == None:
        source = bokehModels.ColumnDataSource(data=dict(x=np.arange(len(yVals)),y=yVals))
    else:
        source = bokehModels.ColumnDataSource(data=dict(x=xVals,y=yVals))
    
    p = bokehPlotting.figure(plot_width=pWidth, plot_height=pHeight, title=title)
    p.add_tools(bokehModels.HoverTool(tooltips=[('','@x,@y')]))
    p.toolbar.active_scroll = p.select_one(bokehModels.WheelZoomTool)    
    if style == 'line':
        p.line(x="x", y="y", source=source)
    elif style == 'points':
        p.circle(x="x", y="y", source=source)
    elif style == 'vbar':
        g=bokehModels.VBar(x="x", top="y")
        p.add_glyph(source, g)
    elif style == 'hbar':
        g=bokehModels.HBar(right="x", y="y")
        p.add_glyph(source, g)        
    return p

In [None]:
def plot_bokehfigure_matrix(m, tickLabels=None, bValues=False):
    pSize=600 
    offset=0.0
    if tickLabels is not None:
        offset=0.5
    source = bokehModels.ColumnDataSource(data=dict(x=[],y=[],z=[], l=[]))
    for i in range(m.shape[0]):
        for j in range(m.shape[1]):
            if m[i,j]>0:
                source.data['x'].append( i+offset )
                source.data['y'].append( j+offset )
                source.data['z'].append( m[i,j] )
                source.data['l'].append( "%.0f" % m[i,j] )
    colors = np.flip(bokehPalettes.Viridis256)
    mapper = bokehModels.LinearColorMapper(palette=colors, low=np.min(m), high=np.max(m))
    if tickLabels == None:
        p = bokehPlotting.figure(plot_width=pSize, plot_height=pSize)
    else:
        p = bokehPlotting.figure(plot_width=pSize, plot_height=pSize, x_range=tickLabels, y_range=tickLabels)
        
    p.add_tools(bokehModels.HoverTool(tooltips=[('','@z')]))    
    p.xaxis.major_label_orientation = "vertical"
    p.toolbar.active_scroll = p.select_one(bokehModels.WheelZoomTool)    
    p.rect(x="x", y="y", width=1, height=1, source=source, line_color=None, alpha=0.9,
           fill_color=bokehTransform.transform('z', mapper))
    if bValues:
        labels = bokehModels.LabelSet(x="x", y="y", text="l", source=source,
               x_offset=0, y_offset=0, text_align='center', text_baseline='middle', render_mode='canvas')
        p.add_layout(labels)
    return p

## Parameter setting for terminal and notebook formats

In [None]:
bPythonExport = False

In [None]:
if bPythonExport:
    import argparse
    parser = argparse.ArgumentParser(description='Perform graph spectral decomposition on a pre-computed DNAD file.',
                                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-n', '--num_vecs', type=int, dest='numVecs', default=4,
                        help='Number of eigenvectors to report. Noting that the first eigenvector is the canonical centrality, '
                             'with higher components denoting divisions into influential sub-domains.')
    parser.add_argument('-i', '--input', type=str, dest='inputPrefix', default=None, required=True,
                        help='Input prefix used to generate files from Step 1.')
    parser.add_argument('-o', '--output', type=str, dest='outputPrefix', default=None, required=True,
                        help='The prefix for all output files, included HTML visualisations aand data files')
    parser.add_argument('-r','--ref', type=str, dest='referencePrefix', default=None,
                        help='(Not currently used) An equivalent input prefix to include a reference Step 1 computation for comparison.')
    parser.add_argument('--title', type=str, dest='titleGraph', default='Spectral Decomposition',
                        help='Name of the graph titles to be shown in the output HTML.')
    
    args = parser.parse_args()
    
    fullPathRoot   = args.inputPrefix
    filePDBRef = fullPathRoot+"_reducedTraj.pdb"
    outputPrefix = args.outputPrefix
    nVecs = args.numVecs
    titleGraph     = args.titleGraph
    bComparison = False
    if args.referencePrefix is not None:
        fullPathRootB = args.referencePrefix
        filePDBRefB = fullPathRootB+"_reducedTraj.pdb"
        bComparison = True

In [None]:
# = = = Change to the relevant work folder
if not bPythonExport:
    systemExample="periplasmic"
    if systemExample == "UbqCHARMM" or systemExample == "UbqTempProfile":
        %cd /home/zharmad/host/projects/Ubq-md
    elif systemExample == "UbqAthi":
        %cd /home/zharmad/host/shared-colleague/Ubq-2017
    elif systemExample == "periplasmic":
        %cd /home/zharmad/projects/periplasmic/leucine-binding_protein
    elif systemExample == "caspase-1":
        %cd /home/zharmad/host/projects/caspase-1
    elif systemExample == "CFTR":
        %cd /home/zharmad/projects/cftr/DyNetAn
    else:
        %cd ..

In [None]:
if not bPythonExport:
    nVecs=8
    outputDir=None
    if systemExample == "UbqCHARMM":
        # apo1 apo2 apo3
        state="apo1" ; dataDir = "./dynetan"        
        fileNameRoot = "%s_5x1000" % state
        stateB="apo2" ; dataDirB = dataDir
        fileNameRootB = "%s_5x1000" % stateB
    elif systemExample == "UbqTempProfile":
        state="Ubq" ; dataDir = "./dynetan"
        fileNameRoot = "ubq-Temp-profile"
        nameDataset = ["%iK" % (w*20+280) for w in range(5)]
    elif systemExample == "UbqAthi":
        # UbqI13V  UbqI23A  UbqI30A  UbqL43A  UbqL67A  UbqL69A  UbqV17A  UbqWT
        state="UbqL43A" ; dataDir = "./dynetan"
        fileNameRoot = "%s_5x200" % state
        stateB="UbqWT" ; dataDirB = dataDir
        fileNameRootB = "%s_5x200" % stateB
        nameConsensus ='L43A' ; nameConsensusB='WT'
    elif systemExample == "periplasmic":
        # apo holo
        state="holo" ; dataDir = "./dynetan"
        fileNameRoot = "%s_5x200" % state
        stateB="apo" ; dataDirB = dataDir
        fileNameRootB = "%s_5x200" % stateB
        nameConsensus ='holo/closed'
        nameConsensusB='apo/open'
    elif systemExample == "caspase-1":
        state="off-state" ; dataDir = "./dynetan"
        fileNameRoot = "%s_5x1000" % state
        stateB="off-state" ; dataDirB = dataDir
        fileNameRootB = "%s_5x1000" % stateB
        nameConsensus ='On state' ; nameConsensusB='Off state'
    elif systemExample == "CFTR":
        # Define mutant file IO locations. wt, P67L, E56K, R75Q, S945L, dF508, Q1291H, etc.
        state="dF508" ; temperature="310K"
        dataDir = "./results/%s/%s/" % (state, temperature)
        #Path where results will be written (you may want plots and data files in a new location)
        outputDir = "./results/%s/%s/analysis" % (state, temperature)
        fileNameRoot = "1to3"
        stateB="wt" ; dataDirB = "./results/%s/%s/" % (stateB, temperature)
        fileNameRootB = "1to6"
        
    else:
        raise KeyboardInterrupt
        
    #Path where results will be written (you may want plots and data files in a new location)
    if outputDir is None:
        outputDir = "./dynetan/analysis"
    fullPathRoot = os.path.join(dataDir, fileNameRoot)
    filePDBRef = fullPathRoot+"_reducedTraj.pdb"
    
    # outputDir is not currently used.
    outputPrefix = "./spectralDecomp"
    titleGraph  = "%s network" % state
    #if bComparison:
           
    bComparison = False
    if bComparison:
        fullPathRootB = os.path.join(dataDirB, fileNameRootB)
        filePDBRefB = fullPathRootB+"_reducedTraj.pdb"
        outputFileName = "./networkView-%s-comparison.html" % state
        
        titleGraph = "%s - %s comparison" % (state, stateB)

## Load the relevant data into DNAD objects

In [None]:
print("= = = Loading input graph data...")
dnad = DNAdata()
# = = = loadFromFile will automatically output debug lines.
dnad.loadFromFile(fullPathRoot)
#mdU = mda.Universe( "./UbqWT/reference.pdb" )
mdU = mda.Universe( filePDBRef )
dnad.nodesAtmSel = mdU.atoms[ dnad.nodesIxArray ]

In [None]:
if bComparison:
    print("= = = Loading reference data...")
    dnadB = DNAdata()
    dnadB.loadFromFile(fullPathRootB)
    mdUB = mda.Universe( filePDBRefB )
    dnadB.nodesAtmSel = mdUB.atoms[ dnadB.nodesIxArray ]
else:
    dnadB = dnad
    mdUB = mdU

In [None]:
def compute_consensus_graph(listG):
    """
    The node_properties and edge properties are the values to take averages of.
    Assumes nodes are the same between all graphs in the list.
    Assumes that the union of all edges should be used, rather than the intersection of all edges.
    """
    nGraphs = len(listG)
    outG = nx.Graph()
    # = = = Create Nodes and Edges = = =
    for x, c in listG[0].nodes.data('name', default=None):
        outG.add_node(x, name=c, communityID=0)
        
    dictWeights={} ; dictCounts={}
    for w in range(nGraphs):
        for u, v, c in listG[w].edges.data('name', default=None):
            if (u,v) not in outG.edges():
                outG.add_edge(u,v,name=c)
                dictWeights[u,v]=listG[w].edges[u,v]['weight']
                dictCounts[u,v]=1
            else:
                dictWeights[u,v]+=listG[w].edges[u,v]['weight']
                dictCounts[u,v]+=1
    for k, v in dictWeights.items():
        outG.edges[k]['weight'] = v/dictCounts[k]
    return outG

In [None]:
def _name_test_graph(G):
    for n in G.nodes():
        G.nodes[n]['name']='X%i' % (n+1)

def _break_distal_edges(G, listNodes):
    """
    Removes all edges that are not immediately adjacent to each other.
    """
    for i,j in combinations(listNodes,2):
        if np.fabs(j-i)>1 and (i,j) in G.edges:
            G.remove_edge(i,j)
    
def build_test_graph(numNodes, style, wBackbone=0.95, wDistal=0.8):
    G = nx.Graph()
    for i in range(numNodes-1):
        G.add_edge(i,i+1, weight=wBackbone)    
    _name_test_graph(G)

    if style == 'coil':
        """
        A model graph that is joined only to immediate neighbours
        """
        return G
    if style == 'helix':
        """
        A model that this is joined with every neighbour (i-3,i+3)
        """
        if numNodes<4:
            print("= = = ERROR: helix motif requires at least 4 nodes")
            return None
        for i in range(numNodes-1):
            for j in [i+2,i+3]:
                if j < numNodes:
                    G.add_edge(i,j, weight=wDistal)
    if style == 'anti-parallel':
        if numNodes<6:
            print("= = = ERROR: anti-parallel beta strand motif needs at least 6 nodes")
            return None
        for i in range(numNodes-1):
            G.add_edge(i, i+1, weight=wBackbone)
        for i in range(int(numNodes/2)):
            j = numNodes-i-1
            if j != i+1:
                G.add_edge(i, j, weight=wDistal)
    
    return G

## Get consensus graph and compute adjacency matrix alongside eigenvectors

In [None]:
GConsensus = compute_consensus_graph(dnad.nxGraphs)
#for n,d in dnad.nxGraphs[0].nodes(data='name'):
#    GConsensus.nodes[n]['name']=d
name_graph_nodes_from_MDAnalysis( GConsensus, dnad.nodesAtmSel )

In [None]:
if False:
    testType='helix'
    GConsensus = build_test_graph(40, style=testType)
    nVecs = 6 ; titleGraph = 'theorical model of %s' % testType
    
    # = = =Break helix in two
    _break_distal_edges(GConsensus, np.arange(17,27))
    # = = = Add interhelix edges.
    #GConsensus.add_edge(10,38,weight=0.4)
    #GConsensus.add_edge(13,34,weight=0.4)
    
    #GConsensus.edges[20,21]['weight']=0.4
    #GConsensus.edges[22,23]['weight']=0.4
    

In [None]:
if False:
    output_notebook()    
    fig = plot_bokehfigure_matrix(nx.convert_matrix.to_numpy_array(GConsensus, weight='weight', nonedge=0.0),
                              tickLabels=[ d for x,d in GConsensus.nodes(data='name')])
    show(fig)

In [None]:
listNames = [ d for x,d in GConsensus.nodes(data='name') ]

In [None]:
dfTopology = build_structure_dataframe(dnad.nodesAtmSel, listNames)

In [None]:
adjMat   = nx.convert_matrix.to_numpy_array(GConsensus, weight='weight', nonedge=0.0)
nNodes = GConsensus.number_of_nodes()

In [None]:
palette = bokehPalettes.viridis(nVecs)
w, v = np.linalg.eigh(adjMat)
w = np.flip(w)
v = np.flip(v,axis=1)
# = = = Normalise v_lambda for net positive
for i in range(nVecs):
    if np.sum(v[:,i])<0:
        v[:,i] *= -1

In [None]:
encode_community_into_graph(GConsensus, nameAttr='modularity')
numLouvain=max([d for k,d in GConsensus.nodes(data='modularity')])

In [None]:
fig1 = plot_bokehfigure_vector(w, style='vbar', pHeight=300,title='Eigenvalues of spectral decomposition')
fig2 = plot_bokehfigure_vector(compute_excess_volume(v), style='vbar', pHeight=300, title='Eigenvector excess volume')
#fig3 = plot_bokehfigure_vector(np.argmax(v,axis=0), style='points', pHeight=300, title='Eigenvector maxima')
fig3 = plot_bokehfigure_vector(v[:,0], style='line', pHeight=300, title='1st eigenvector (i.e. centrality)')
fig4 = plot_bokehfigure_vector([d for k,d in GConsensus.nodes(data='modularity')],
                               style='points', pHeight=300, title='Default Louvain Community ID via community.best_partition()')
if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_basics.html')
show(bokehModels.Column(fig1, fig2, fig3, fig4))

In [None]:
dfMerge = pd.merge( dfTopology, pd.DataFrame([d for k,d in GConsensus.nodes(data='modularity')], index=listNames),
                   left_on='key', right_index=True )
dfMerge.to_csv(outputPrefix+'_LouvainCommunities.csv', index=False)

## Compute, Export, and plot eigenvectors from the spectral decomposition

In [None]:
dataFrame = pd.DataFrame( v[:,:nVecs], index=listNames)

In [None]:
dfMerge = pd.merge( dfTopology, dataFrame, left_on='key', right_index=True )
dfMerge.to_csv(outputPrefix+'_eigenvectors.csv', index=False)

In [None]:
source = bokehModels.ColumnDataSource(data=dict())
source.data['name'] = listNames
listLegend = []
for i in range(nVecs):
    nameField='v%i'%i
    listLegend.append(nameField)
    #listLegend.append('%s ; \N{GREEK SMALL LETTER LAMDA}=%.3g' % (nameField, w[i]) )
    source.data[nameField] = v[:,i]
    #source.data[nameField] = np.sign(v[:,i])
    
fig = bokehPlotting.figure(width=1000, height=400, title=titleGraph,
                           x_axis_label='Node ID', y_axis_label='eigenvector',
                           x_range=listNames,
                           tools = "pan,wheel_zoom,box_zoom,box_select,tap,save,reset,help")
fig.xaxis.major_label_orientation = "vertical"
fig.toolbar.active_scroll = fig.select_one(bokehModels.WheelZoomTool)

for i in range(nVecs):
    nameField='v%i'%i
    fig.line('name', nameField, legend_label=listLegend[i], source=source,line_width=1, line_color=palette[i])
fig.legend.click_policy="hide"    

tooltips = [("Name", "@name")]
fig.add_tools(bokehModels.HoverTool(tooltips=tooltips))

if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_eigenvectors.html')
    
show(fig)

## Compute, export and plot heirarchical divisions in sequence space

In [None]:
dataFrame = pd.DataFrame( extract_spectral_heirarchy(v,nVecs, bCompress=True), index=listNames)

In [None]:
dataFrame.rename_axis('Heirarchy', axis=1)
dataFrame.rename_axis('Node ID', axis=0)
maxColors = dataFrame.max().max()+1
palette2 = bokehPalettes.viridis(min(maxColors,256))

cMax=max(dataFrame.max().max(),numLouvain)
fact=1
if cMax>numLouvain:
    fact=cMax/numLouvain
source = bokehModels.ColumnDataSource( pd.DataFrame(dataFrame.stack(), columns=['membership']).reset_index() )
source2 = bokehModels.ColumnDataSource( data=dict(x=list(dataFrame.index), y=[nVecs]*nNodes,
                                                  level_0=listNames,
                                                  membership=[int(fact*d) for k,d in GConsensus.nodes(data='modularity')]) )
mapper = bokehModels.LinearColorMapper(palette=palette2, low=-0.5, high=cMax+0.5)

In [None]:
dfMerge = pd.merge( dfTopology, dataFrame, left_on='key', right_index=True )
dfMerge.to_csv(outputPrefix+'_node_heirarchy.csv', index=False)

In [None]:
fig = bokehPlotting.figure(width=1000, height=400, title=titleGraph,
                           x_axis_label='Node ID', y_axis_label='Heirarchy level',
                           x_range=list(dataFrame.index),
                           tools = "pan,wheel_zoom,box_zoom,box_select,tap,save,reset,help")
fig.grid.grid_line_color = None
fig.axis.axis_line_color = None
fig.xaxis.major_label_orientation = "vertical"
fig.yaxis.ticker = np.arange(0,nVecs+1)
fig.yaxis.major_label_overrides = {nVecs: 'Louvain'}
fig.axis.major_tick_line_color = None
fig.toolbar.active_scroll = fig.select_one(bokehModels.WheelZoomTool)

cir = fig.rect(x="level_0", y="level_1", width=1, height=1,source=source,fill_color={'field': 'membership', 'transform': mapper},line_color=None)
cir2 = fig.rect(x="x", y="y", width=1, height=1,source=source2,
               fill_color={'field': 'membership', 'transform': mapper},line_color=None)
cbar = bokehModels.ColorBar(title='Group Membership', color_mapper=mapper, border_line_color=None, location=(0, 0))
fig.add_layout(cbar, 'right')

tooltips = [("Name", "@level_0"),("Member","@membership")]
fig.add_tools(bokehModels.HoverTool(tooltips=tooltips))

if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_node_heirarchy.html')
    
show(fig)

## Compute, export and plot spectral clustering

In [None]:
dataFrame = pd.DataFrame(extract_spectral_clustering(adjMat, nVecs),
                         index=listNames,
                         columns=range(1,nVecs+1)
                        )

In [None]:
dataFrame.rename_axis('Cluster Membership', axis=1)
dataFrame.rename_axis('Node ID', axis=0)
maxColors = dataFrame.max().max()+1
palette2 = bokehPalettes.viridis(min(maxColors,256))
cMax=max(dataFrame.max().max(),numLouvain)
fact=1
if cMax>numLouvain:
    fact=cMax/numLouvain
source  = bokehModels.ColumnDataSource( pd.DataFrame(dataFrame.stack(), columns=['membership']).reset_index() )
source2 = bokehModels.ColumnDataSource( data=dict(x=list(dataFrame.index), y=[nVecs+1]*nNodes,
                                                  level_0=listNames,
                                                  membership=[int(fact*d) for k,d in GConsensus.nodes(data='modularity')]) )
mapper = bokehModels.LinearColorMapper(palette=palette2, low=-0.5, high=cMax+0.5)

In [None]:
dfMerge = pd.merge( dfTopology, dataFrame, left_on='key', right_index=True )
dfMerge.to_csv(outputPrefix+'_spectral_clusters.csv', index=False)

In [None]:
fig = bokehPlotting.figure(width=1000, height=400, title=titleGraph,
                           x_axis_label='Node ID', y_axis_label='Spectral Clusters',
                           x_range=list(dataFrame.index),
                           tools = "pan,wheel_zoom,box_zoom,box_select,tap,save,reset,help")
fig.grid.grid_line_color = None
fig.axis.axis_line_color = None
fig.xaxis.major_label_orientation = "vertical"
#fig.yaxis.ticker = basicTicker()
fig.yaxis.ticker = np.arange(1,nVecs+2)
fig.yaxis.major_label_overrides = {nVecs+1: 'Louvain'}
fig.axis.major_tick_line_color = None
fig.toolbar.active_scroll = fig.select_one(bokehModels.WheelZoomTool)

cir  = fig.rect(x="level_0", y="level_1", width=1, height=1,source=source,
               fill_color={'field': 'membership', 'transform': mapper},line_color=None)
cir2 = fig.rect(x="x", y="y", width=1, height=1,source=source2,
               fill_color={'field': 'membership', 'transform': mapper},line_color=None)
cbar = bokehModels.ColorBar(title='Group Membership', color_mapper=mapper, border_line_color=None, location=(0, 0))
fig.add_layout(cbar, 'right')

tooltips = [("Name", "@level_0"),("Member","@membership")]
fig.add_tools(bokehModels.HoverTool(tooltips=tooltips))

if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_spectral_clusters.html')
    
show(fig)

## Compute, export and plot eigenvector overlaps

In [None]:
dataFrame = pd.DataFrame( compute_eigenvector_overlap(v,nVecs) )
dataFrame.rename_axis('eigenvector', axis=1)
dataFrame.rename_axis('eigenvector', axis=0)

In [None]:
dataFrame.to_csv(outputPrefix+'_overlap.csv', index=True)

In [None]:
palette3 = bokehPalettes.viridis(256)
source = bokehModels.ColumnDataSource( pd.DataFrame(dataFrame.stack(), columns=['overlap']).reset_index() )
mapper = bokehModels.LinearColorMapper(palette=palette3, low=0.0, high=1.0)

In [None]:
fig = bokehPlotting.figure(width=500, height=400, title=titleGraph,
                           x_axis_label='eigenvector', y_axis_label='eigenvector',
                           tools = "pan,wheel_zoom,box_zoom,box_select,tap,save,reset,help")
fig.grid.grid_line_color = None
fig.axis.axis_line_color = None
fig.axis.major_tick_line_color = None
fig.xaxis.ticker = basicTicker()
fig.yaxis.ticker = basicTicker()

cir = fig.rect(x="level_0", y="level_1", width=1, height=1,source=source,fill_color={'field': 'overlap', 'transform': mapper},line_color=None)

cbar = bokehModels.ColorBar(title='Overlap', color_mapper=mapper, border_line_color=None, location=(0, 0))
fig.add_layout(cbar, 'right')

tooltips = [("Pair","@level_0,@level_1"),("Overlap", "@overlap")]
fig.add_tools(bokehModels.HoverTool(tooltips=tooltips))

In [None]:
if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_overlap.html')
    
show(fig)

## Compute, export and plot excess volumes

In [None]:
values = compute_excess_volume(v,nVecs)

In [None]:
source = bokehModels.ColumnDataSource( data=dict(x=np.arange(nVecs), y=values) )

fig = bokehPlotting.figure(width=400, height=400, title=titleGraph,
                           x_axis_label='eigenvector', y_axis_label='Excess volume',
                           y_range=(0,1),
                           tools = "pan,wheel_zoom,box_zoom,box_select,tap,save,reset,help")

glyph = bokehModels.VBar(x="x", top="y", bottom=0, width=0.5, fill_color="#b3de69")
fig.add_glyph(source, glyph)

if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_excess.html')
    
show(fig)

In [None]:
l = community.generate_dendrogram(GConsensus)

In [None]:
cMaxInit=0 ; scaleFactor = []
for j in range(len(l)):
    cMax = np.max( [d for d in l[j].values()] ) 
    if cMaxInit == 0:
        cMaxInit = cMax
    scaleFactor.append( cMaxInit/cMax )

arr = []
for k0 in range(nNodes):
    k = k0
    branchNode = [] 
    for j in range(len(l)):
        k = l[j][k]
        branchNode.append( int(k*scaleFactor[j]) )
    arr.append(branchNode)
dataFrame = pd.DataFrame( arr, index=listNames )

In [None]:
dataFrame.rename_axis('Louvain Dendrogram', axis=1)
dataFrame.rename_axis('Node ID', axis=0)
maxColors = dataFrame.max().max()+1
palette2 = bokehPalettes.viridis(min(maxColors,256))
source  = bokehModels.ColumnDataSource( pd.DataFrame(dataFrame.stack(), columns=['membership']).reset_index() )
mapper = bokehModels.LinearColorMapper(palette=palette2, low=-0.5, high=dataFrame.max().max()+0.5)

In [None]:
fig = bokehPlotting.figure(width=1000, height=400, title=titleGraph,
                           x_axis_label='Node ID', y_axis_label='Louvain Dendrogram',
                           x_range=listNames,
                           tools = "pan,wheel_zoom,box_zoom,box_select,tap,save,reset,help")
fig.grid.grid_line_color = None
fig.axis.axis_line_color = None
fig.xaxis.major_label_orientation = "vertical"
#fig.yaxis.ticker = basicTicker()
fig.yaxis.ticker = np.arange(1,nVecs+2)
fig.yaxis.major_label_overrides = {nVecs+1: 'Louvain'}
fig.axis.major_tick_line_color = None
fig.toolbar.active_scroll = fig.select_one(bokehModels.WheelZoomTool)

cir  = fig.rect(x="level_0", y="level_1", width=1, height=1,source=source,
               fill_color={'field': 'membership', 'transform': mapper},line_color=None)
cbar = bokehModels.ColorBar(title='Group Membership', color_mapper=mapper, border_line_color=None, location=(0, 0))
fig.add_layout(cbar, 'right')

tooltips = [("Name", "@level_0"),("Member","@membership")]
fig.add_tools(bokehModels.HoverTool(tooltips=tooltips))

if not bPythonExport:
    output_notebook()
else:
    output_file(outputPrefix+'_louvain_dendrogram.html')
    
show(fig)