In [None]:
# Load the python package
import os
from dynetan.toolkit import *
from dynetan.viz import *
from dynetan.proctraj import *
from dynetan.gencor import *
from dynetan.contact import *
from dynetan.datastorage import *

from MDAnalysis.analysis import distances as MDAdistances
#from numpy.linalg import norm
#from itertools import islice
from itertools import combinations
from scipy import stats
from collections import OrderedDict

import networkx as nx
import numpy as np
import scipy as sp

In [None]:
# For visualization
from bokeh.io import output_file, output_notebook, push_notebook, show
from bokeh import models as bokehModels
from bokeh import transform as bokehTransform
from bokeh import layouts as bokehLayouts
from bokeh import plotting as bokehPlotting
from bokeh import palettes as bokehPalettes
from bokeh import events as bokehEvents
# For pre-calculating CArtesian distances based on 2D embedding
from sklearn.manifold import MDS

In [None]:
mapResidueNames={'ALA':'A','CYS':'C','ASP':'D','GLU':'E','PHE':'F',
                 'GLY':'G','HIS':'H','HSD':'H','HSE':'H','ILE':'I','LYS':'K','LEU':'L',
                 'MET':'M','ASN':'N','PRO':'P','GLN':'Q','ARG':'R',
                 'SER':'S','THR':'T','VAL':'V','TRP':'W','TYR':'Y',
                 'MG':'Mg','ATP':'Atp','POPC':'Popc','SOL':'h2o'}

def name_node(dnad, node):
    #i=dnad.nodesAtmSel[node].index
    resname=dnad.nodesAtmSel[node].resname ; resid=dnad.nodesAtmSel[node].resid
    return "%s%s" % (mapResidueNames[resname], resid)

def clarify_duplicate_nodes(dictNames, dictSuffix):
    """
    From two dicts with the same keys, add the respective suffix to all keys in the former that possess duplicate values.
# For visualization
from bokeh.io import output_file, output_notebook, push_notebook, show
from bokeh import models as bokehModels
from bokeh import transform as bokehTransform
from bokeh import layouts as bokehLayouts
from bokeh import plotting as bokehPlotting
from bokeh import palettes as bokehPalettes
from bokeh import events as bokehEvents
# For pre-calculating CArtesian distances based on 2D embedding
from sklearn.manifold import MDS    """
    from itertools import chain
    dictRev = {}
    for k, v in dictNames.items():
        dictRev.setdefault(v, set()).add(k)
        setDuplicateKeys = set(chain.from_iterable( v for k, v in dictRev.items() if len(v) > 1))
    for k in setDuplicateKeys:
        dictNames[k] = dictNames[k]+"_"+dictSuffix[k]
    return dictNames  

In [None]:
def load_all_graphs(workingFolder):
    """
    Load all results bt
    """

In [None]:
def simplify_graph(G, attr='segid', listIgnoredNodes=[]):
    outG = nx.Graph()
    for n,d in G.nodes(data=attr):
        if d not in outG.nodes():
            outG.add_node( d )
            outG.nodes[d]['count']=0
        outG.nodes[d]['count']+=1
    for u,v,w in G.edges(data='weight'):
        k1 = G.nodes[u][attr]
        k2 = G.nodes[v][attr]
        if (k1,k2) not in outG.edges():
            outG.add_edge(k1,k2, weight=0.0)
        outG.edges[k1,k2]['weight'] += w

    # = = = Easier to remove nodes post-fact rather than going through all the if-statements.
    for d in listIgnoredNodes:
        if d in outG.nodes():
            outG.remove_node( d )

    return outG
    # Hello World

In [None]:
bPythonExport = False

In [None]:
if not bPythonExport:
    %cd ..
    # Define mutant file IO locations. wt, P67L, E56K, R75Q, S945L, dF508
    temperature="310K"
    #listAlleles = ['wt','dF508','N1303K']
    listAlleles = ['wt',
                  'R117C','R117H','E92K','E56K','G85E','I148T','R74W','R75Q','P67L','E60K','M1101K',
                  'R31C','D836Y','E826K',
                  'L927P','R347H','R347P',
                  'S945L','M952T',
                  'D1152H',
                  'Q1291F','Q1291H','G551D','G551S','S549R','D529N','F575Y',
                  'V520F','R560T','D513G','dF508','R1066C','R1066H',
                  'A455E','D614G','L467P',
                  'S1235R',
                  'N1303K'
                 ]
    #outputFileName = "./results/networkView_%s_%s.html" % (allele, temperature)
    outputFileName = "./networkCompare.html"
    
    fileImportPos  = './CFTRGraphReferencePositions.txt'
    fileExportPos  = './temp.txt'
    #fileClusterDefinitions = None
    fileClusterDefinitions = './Stable_Solvent_Clustering.cluster_definitions_d3.5_r0.50.txt'

In [None]:
print("= = = Loading input graph data from all input paths...")
listG=[] ; listNames=[]
for allele in listAlleles:
    dataDir = "./results/%s/%s/" % (allele, temperature)
    #Path where results will be written (you may want plots and data files in a new location)
    workDir = "./results/%s/%s/analysis" % (allele, temperature)
    if allele == 'wt':
        fileNameRoot = "1to6"
    else:
        fileNameRoot = "1to3"
    fullPathRoot = os.path.join(dataDir, fileNameRoot)
    
    if not os.path.exists(fullPathRoot+'.hf'):
        print("...path %s does not contain results files? Skipping." % fullPathRoot)
        continue
    print("...loading from path %s:" % fullPathRoot)
    
    dnad = DNAdata()
    # = = = loadFromFile will automatically output debug lines.
    dnad.loadFromFile(fullPathRoot)
    #dcdVizFile = fullPathRoot + "_reducedTraj.dcd"
    pdbVizFile = fullPathRoot + "_reducedTraj.pdb"
    #mdU = mda.Universe(pdbVizFile,dcdVizFile)
    mdU = mda.Universe(pdbVizFile)
    dnad.nodesAtmSel = mdU.atoms[ dnad.nodesIxArray ]
   
    # = = = Make a simpler representation via segid, and exclude crystallographic waters as they don't comprise a cohesive node.
    for w in range(dnad.numWinds):
        
        # = = = = Temporary code. Segid was somehow not set in the wildtype 1to6 package.
        # Set the name of the node for Hover display. Append atom names to residues that have multiple nodes.
        nodeNames={} ; nodeSegIDs={} ; nodeAtomNames={}
        for x in dnad.nxGraphs[w].nodes():
            #i=dnap.nodesAtmSel[x].index
            nodeNames[x]  = name_node(dnad, x)
            nodeSegIDs[x] = dnad.nodesAtmSel[x].segid
            nodeAtomNames[x] = dnad.nodesAtmSel[x].name
        nodeNames = clarify_duplicate_nodes( nodeNames, nodeAtomNames )
        nx.set_node_attributes(dnad.nxGraphs[w], nodeNames, "name")
        nx.set_node_attributes(dnad.nxGraphs[w], nodeSegIDs, "segid")          
        
        G = simplify_graph(dnad.nxGraphs[w], 'segid', ['CRY'])
        listG.append(G)
        listNames.append('%s_%i' % (allele, w))    
        

In [None]:
if fileImportPos is not None:
    # = = = Cheat with resid by eliminating the first letter.
    refPosList={}
    with open(fileImportPos,'r') as fp:
        for line in fp:
            l=line.split()     
            if len(l) != 3:
                continue
            refPosList[ l[0][1:] ] = [ float(l[1]), float(l[2]) ]
    posNodes={}
    for a, name in dnad.nxGraphs[0].nodes(data='name'):
        s = dnad.nxGraphs[0].nodes[a]['segid']
        if name[1:] in refPosList.keys():
            pos = refPosList[name[1:]]
            if s not in posNodes.keys():
                posNodes[s] = []
            posNodes[s].append(pos)
    for k in posNodes.keys():
        posNodes[k] = np.mean(posNodes[k],axis=0)
    bPosSet=True

In [None]:
def arrange_self_edges(G, CDS, pos):
    """
    Sets on a ColumnDataSource to plot graph self-edges.
    Runs a simplistic computation over neighbour node positions such that the edge will be oriented to lesson overlap with existint edge rays.
    """
    widthWedge=np.pi/4
    for u,v,weight in G.edges(data='weight'):
        if u==v:
            x = pos[u]
            CDS.data['x'].append(x[0])
            CDS.data['y'].append(x[1])
            vec = x - np.mean([pos[w] for w in G.neighbors(u)],axis=0)
            a = np.arctan2(vec[1],vec[0])
            CDS.data['a1'].append(a-widthWedge)
            CDS.data['a2'].append(a+widthWedge)
            CDS.data['weight'].append( weight )

In [None]:
def return_copy_without_self_edges(G):
    outG = G.copy()
    for n in outG.nodes():
        if (n,n) in outG.edges():
            outG.remove_edge(n,n)
    return outG

In [None]:
def get_node_data_range(G, nodeAttr):
    vals = [ G.nodes[x][nodeAttr] for x in G.nodes() ]
    return np.min(vals), np.max(vals)

def get_node_color_label_map(G):
    vals = [ x for x in G.nodes() ]
    _, i = np.unique(vals, return_index=True)
    vMap=np.array([ vals[x] for x in np.sort(i)])
    return vMap

def format_graph_nodes_by_palette(G, palette, nullColour='#FFFFFF'):
    # Set the node properties as additional entries in the graph.
    # Should I wrap palette around for text encodings that has more types than the number of colours in palette
    pMax = len(palette)
    nodeColors={}
    vMap=get_node_color_label_map(G)
    
    for n in G.nodes():
        i = np.where(vMap==n)[0][0]
        if i>=pMax:
            nodeColors[n] = nullColour
        else:
            nodeColors[n] = palette[ i ]
    nx.set_node_attributes(G, nodeColors, "node_color")

In [None]:
colourPaletteCat = ['#FF0000'] + list( bokehPalettes.Colorblind[8] ) + ['#666666']
for gg in listG:
    format_graph_nodes_by_palette(gg, colourPaletteCat)
#format_graph_edges_by_palette(G, colourPaletteLin)

In [None]:
def rescale_linear(dMin=0, dMax=1):
    r = dict(min=dMin,max=dMax)
    vfunc = """
        const norm = new Float64Array(xs.length)
        const min = Math.min(...xs)
        const max = Math.max(...xs)
        for (let i = 0; i < xs.length; i++) {
            norm[i] = r.min + (xs[i]-min)*(r.max-r.min)/(max-min)
        }
        return norm
    """
    return bokehModels.CustomJSTransform(args=dict(r=r), v_func=vfunc)

In [None]:
def create_JS_update_visibility(sourceTable, dictRenderer, dictGlyph, targetPlot):
    """
    This Javascript snippet updates which of the graphs elements are visible depending on the selection in the source widget.
    """
    return bokehModels.CustomJS(args=dict(s=sourceTable, dR=dictRenderer, dG=dictGlyph, pTo=targetPlot),
        code="""
        var inds = cb_obj.indices ;
        var listSelected = [] ;
        
        for (var i = 0; i < inds.length; i++) {
            listSelected.push( s.data['items'][inds[i]] )
        }
        
        // Note: dict uses "X in Y" notation, while arrays use X.includes(Y) notation.
        for (let k in dR) {
            if (listSelected.includes(k)) {
                dR[k].visible = true
            } else {
                dR[k].visible = false
            }
        }
        for (let k in dG) {
            if (listSelected.includes(k)) {
                dG[k].visible = true
            } else {
                dG[k].visible = false
            }
        }
        pTo.change.emit();
    """,
    )

In [None]:
# SAVE
#output_file('Sample_Application.html',mode='inline',root_dir=None)
output_notebook()

# = = = = = = = = = = = = = = = = = = = = = = = = = = =
# = = General settings.
# = = = = = = = = = = = = = = = = = = = = = = = = = = =
plotWidth=800 ; plotHeight=600

# = = = = = = = = = = = = = = = = = = = = = = = = = = =
# = = Graph
# = = = = = = = = = = = = = = = = = = = = = = = = = = =

colorsA = np.flip(bokehPalettes.YlOrRd[9])
mapperA = bokehModels.LinearColorMapper(palette=colorsA, low=0, high=1)
colorsB = np.flip(bokehPalettes.Blues[9])
mapperB = bokehModels.LinearColorMapper(palette=colorsB, low=0, high=1)

# = = = Plot Prep
figA = bokehPlotting.figure(plot_width=plotWidth, plot_height=plotHeight,
              tools=["pan","wheel_zoom", "tap", "reset", "save"],
              title="Overview")
figA.toolbar.active_scroll = figA.select_one(bokehModels.WheelZoomTool)
figA.title.text = "Graph Search Demonstration"

# = = = Plot all graphs elements but hide them dynamically based on tabel selection!
dictGraphMain = {} ; dictGraphSelfEdge = {}

for i in range(len(listG)):
    G   = listG[i] ; key = listNames[i]
    
    # = = = Plot main part of the graph without self edges.
    Gplot = return_copy_without_self_edges(G)
    rendererGraph = bokehPlotting.from_networkx(Gplot, posNodes, scale=2, center=(0, 0))
    rendererGraph.node_renderer.glyph = bokehModels.Circle(size=bokehTransform.transform('count',rescale_linear(10,20)),
                                                           fill_color='node_color'
                                                          )
    rendererGraph.edge_renderer.glyph = bokehModels.MultiLine(line_color='black',
                                                              line_alpha=0.8,
                                                              line_width=bokehTransform.transform('weight',rescale_linear(1,5))
                                                             )
    rendererGraph.node_renderer.selection_glyph = bokehModels.Circle(size=20, fill_color='node_color')
    #sourceGraph = rendererGraph.node_renderer.data_source
    
    # = = = Plot self-edge part of the graph.
    sourceSelfEdge = bokehModels.ColumnDataSource(data=dict(x=[], y=[], a1=[], a2=[], weight=[]))
    #glyphSelfEdge = bokehModels.Circle(x="x", y="y", size=30, fill_alpha=0.0,
    #                                     line_color='grey', line_alpha=0.8, line_width=1)
    glyphSelfEdge = bokehModels.AnnularWedge(x="x", y="y", inner_radius=0, outer_radius=20, outer_radius_units='screen',
                                             start_angle="a1", end_angle="a2", fill_alpha=0.0,
                                             line_color='grey',
                                             line_alpha=0.8,
                                             line_width=bokehTransform.transform('weight',rescale_linear(1,5))
                                            )
    arrange_self_edges(G, sourceSelfEdge, rendererGraph.layout_provider.graph_layout)
    
    
    # = = = Add these renderes to the figure
    rendererSelfEdges = figA.add_glyph(sourceSelfEdge, glyphSelfEdge)
    rendererSelfEdges.visible=True
    figA.renderers.append(rendererGraph)
    rendererGraph.visible=True
    
    dictGraphMain[key]     = rendererGraph
    dictGraphSelfEdge[key] = rendererSelfEdges

# = = = = = = = = = = = = = = = = = = = = = = =
# = = = Additional bells and whistles
# = = = = = = = = = = = = = = = = = = = = = = =

# = = = Colour Bar
colourList = get_node_color_label_map(G)
palette = colourPaletteCat
if len(colourList) > len(palette):
    colourList = colourList[:len(palette)]
elif len(palette) > len(colourList):
    palette = colourPaletteCat[:len(colourList)]
colourMapper = bokehModels.mappers.CategoricalColorMapper(palette=palette, factors=colourList)
colourBar = bokehModels.ColorBar(name='SegID', color_mapper=colourMapper, label_standoff=12)
figA.add_layout(colourBar, 'right')    

# = = = Hover Tools
listTemp = [ dictGraphMain[k].edge_renderer for k in dictGraphMain.keys() ] + [ dictGraphSelfEdge[k] for k in dictGraphSelfEdge.keys() ]
edge_hover_tool = bokehModels.HoverTool(tooltips=[("weight", "@weight")], renderers=listTemp )
figA.add_tools(edge_hover_tool)

listTemp = [ dictGraphMain[k].node_renderer for k in dictGraphMain.keys() ]
node_hover_tool = bokehModels.HoverTool(tooltips=[("index", "@index")], renderers=listTemp)
figA.add_tools(node_hover_tool)
#p.add_tools(bokehModels.HoverTool(tooltips=tooltips, renderers=[rendererA,rendererB]))




In [None]:
# = = = Controls for allele comparison.
dataTable = dict(items=listNames)
sourceTable = bokehModels.ColumnDataSource(dataTable)
colsTable = [ bokehModels.TableColumn(field="items", title="Select Allele") ]

tableData = bokehModels.DataTable(source=sourceTable, columns=colsTable, index_position=None,
                                  width=100, sizing_mode='stretch_height')

# = = = Hookup to update visibility
tableData.source.selected.js_on_change("indices",
                                       create_JS_update_visibility(sourceTable, dictGraphMain, dictGraphSelfEdge, figA)
                                      )
#graphRenderer.node_renderer.data_source.selected.js_on_change("indices", callbackBig)
#widgetSelect = bokehModels.Select(title="Option:", value="foo", options=["foo", "bar", "baz", "quux"],
#                            sizing_mode='stretch_height')

In [None]:
layout=bokehModels.Row(tableData, figA)

show(layout)

In [None]:
plotWidth=1200 ; plotHeight=500
edgePair=('ND1','ND2')
sourceEdge = bokehModels.ColumnDataSource(data=dict(x=[], y=[]))
listCategories=[]
for i in range(len(listG)):
    G = listG[i] ; key = listNames[i]
    xVal = key.split('_')[0]
    sourceEdge.data['x'].append( xVal )
    sourceEdge.data['y'].append( G.edges[edgePair]['weight'] )
    if xVal not in listCategories:
        listCategories.append( xVal )

figB = bokehPlotting.figure(plot_width=plotWidth, plot_height=plotHeight,
              tools=["pan","wheel_zoom", "tap", "reset", "save"],
              title="Overview",
              x_range=listCategories,
              y_axis_label="%s - %s" % edgePair)
figB.toolbar.active_scroll = figA.select_one(bokehModels.WheelZoomTool)
figB.title.text = "Cross graph comparisons"

In [None]:
import pandas as pd

widthBox=0.5
widthWhisker=0.1

df = pd.DataFrame(dict(x=sourceEdge.data['x'],
                       y=sourceEdge.data['y']))

# find the quartiles and IQR for each category
groups = df.groupby('x', sort=False)
q0 = groups.quantile(q=0.00)
q1 = groups.quantile(q=0.25)
q2 = groups.quantile(q=0.50)
q3 = groups.quantile(q=0.75)
q4 = groups.quantile(q=1.0)
iqr = q3 - q1
upper = q3 + 1.5*iqr
lower = q1 - 1.5*iqr

# assume no outliers, shrink lengths of stems to be no longer than the minimums or maximums
upper.y = [min([x,y]) for (x,y) in zip(list(q4.loc[:,'y']),upper.y)]
lower.y = [max([x,y]) for (x,y) in zip(list(q0.loc[:,'y']),lower.y)]

# stems
figB.segment(listCategories, upper.y, listCategories, q3.y, line_color="grey")
figB.segment(listCategories, lower.y, listCategories, q1.y, line_color="grey")

# boxes
figB.vbar(listCategories, widthBox, q2.y, q3.y, fill_alpha=0.0, line_color="grey")
figB.vbar(listCategories, widthBox, q1.y, q2.y, fill_alpha=0.0, line_color="grey")

# whiskers (almost-0 height rects simpler than segments)
figB.rect(listCategories, lower.y, widthWhisker, 0.01, line_color="grey")
figB.rect(listCategories, upper.y, widthWhisker, 0.01, line_color="grey")

In [None]:
figB.scatter(source=sourceEdge,
             x=bokehTransform.jitter('x', 0.1, mean=0, distribution='normal', range=figB.x_range),
             y='y',
             size=6, fill_color='#BBCCFF', line_color='black'
            )

In [None]:
show(figB)

In [None]:
#df['y'].__array__()