In [25]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
import os
import itertools
import tqdm
import gc

import correlation_data_utilities
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from IPython.display import display

#import nglview
#import py3dmol

In [4]:
dataDir='energyDataFiles'
energyData=pd.read_csv('/'.join([dataDir,'energyNetworkDataTable.csv']))
energyData.head()

Unnamed: 0,Resid_1,ResName_1,Chain_1,Seqid_1,Resid_2,ResName_2,Chain_2,Seqid_2,TOTAL.Avg,TOTAL.Std_Dev,TOTAL.Std_Err_of_Mean,Chain_Delta,System,Variant,Rep
0,1,MET,1,1,1,MET,1,1,-24.06473,1.788517,0.177964,0,n14k2,acetyl,rep1
1,1,MET,1,1,2,ASP,1,2,-27.276465,0.659607,0.065633,0,n14k2,acetyl,rep1
2,1,MET,1,1,3,TRP,1,3,-3.058341,0.3312,0.032956,0,n14k2,acetyl,rep1
3,1,MET,1,1,5,THR,1,5,-3.156847,1.469387,0.14621,0,n14k2,acetyl,rep1
4,1,MET,1,1,6,LEU,1,6,-0.124479,0.081879,0.008147,0,n14k2,acetyl,rep1


In [12]:
networkGroups=energyData.groupby(['System','Variant','Rep'])
nRes=1356
nChains=6
resPerChain=nRes/nChains

sourceSeqids=[14]
sourceResids=np.array(np.concatenate([np.array(sourceSeqids)+resPerChain*iChain-1 for \
                            iChain in np.arange(nChains)]),
                      dtype=int)
targetSeqids=[47]
targetResids=np.array(np.concatenate([np.array(targetSeqids)+resPerChain*iChain-1 for \
                            iChain in np.arange(nChains)]),
                      dtype=int)

networkDataTables=[]
with tqdm.tqdm_notebook(
    networkGroups,desc='Analyzing Network Data') \
    as groupIter:
    with tqdm.tqdm_notebook(desc='computing flow betweenness') \
        as pbar:
        for groupName,groupData in groupIter:
            groupIter.set_description_str('.'.join(groupName))
            netMat=sp.sparse.coo_matrix(
                    (groupData['TOTAL.Avg'].abs().to_numpy(),
                     (groupData['Resid_1'].to_numpy()-1,
                      groupData['Resid_2'].to_numpy()-1)),
                    shape=(nRes,nRes)).todense()
            
            pbar.reset()
            pbar.set_description_str('.'.join(groupName))
            btwMat=correlation_data_utilities.getBtwMat(
                mat=netMat,sources=sourceResids,targets=targetResids,
                verbose=False,verboseLevel=0,
                useProgressBar=True,pbarFun=pbar)
            nzInds=np.nonzero(btwMat)
            
            tempTable=groupData.copy()
            tempTable['Betweenness']=tempTable[['Resid_1','Resid_2']].apply(
                lambda x: btwMat[x[0]-1,x[1]-1],axis=1)
            networkDataTables.append(tempTable.copy())
            gc.collect()
networkDataTable=pd.concat(networkDataTables)
networkDataTable.head()

HBox(children=(IntProgress(value=0, description='Analyzing Network Data', max=20, style=ProgressStyle(descript…

HBox(children=(IntProgress(value=1, bar_style='info', description='computing flow betweenness', max=1, style=P…

Unnamed: 0,Resid_1,ResName_1,Chain_1,Seqid_1,Resid_2,ResName_2,Chain_2,Seqid_2,TOTAL.Avg,TOTAL.Std_Dev,TOTAL.Std_Err_of_Mean,Chain_Delta,System,Variant,Rep,Betweenness
0,1,MET,1,1,1,MET,1,1,-24.06473,1.788517,0.177964,0,n14k2,acetyl,rep1,0.0
1,1,MET,1,1,2,ASP,1,2,-27.276465,0.659607,0.065633,0,n14k2,acetyl,rep1,0.036184
2,1,MET,1,1,3,TRP,1,3,-3.058341,0.3312,0.032956,0,n14k2,acetyl,rep1,0.002489
3,1,MET,1,1,5,THR,1,5,-3.156847,1.469387,0.14621,0,n14k2,acetyl,rep1,0.004992
4,1,MET,1,1,6,LEU,1,6,-0.124479,0.081879,0.008147,0,n14k2,acetyl,rep1,0.000264


In [26]:
networkDataTable.to_csv('/'.join([dataDir,'interactionNetworkData.csv']),
                        index=False)

In [27]:
dataDir='energyDataFiles'
networkDataTables=pd.read_csv('/'.join([dataDir,'interactionNetworkData.csv']))
networkDataTables.head()

Unnamed: 0,Resid_1,ResName_1,Chain_1,Seqid_1,Resid_2,ResName_2,Chain_2,Seqid_2,TOTAL.Avg,TOTAL.Std_Dev,TOTAL.Std_Err_of_Mean,Chain_Delta,System,Variant,Rep,Betweenness
0,1,MET,1,1,1,MET,1,1,-24.06473,1.788517,0.177964,0,n14k2,acetyl,rep1,0.0
1,1,MET,1,1,2,ASP,1,2,-27.276465,0.659607,0.065633,0,n14k2,acetyl,rep1,0.036184
2,1,MET,1,1,3,TRP,1,3,-3.058341,0.3312,0.032956,0,n14k2,acetyl,rep1,0.002489
3,1,MET,1,1,5,THR,1,5,-3.156847,1.469387,0.14621,0,n14k2,acetyl,rep1,0.004992
4,1,MET,1,1,6,LEU,1,6,-0.124479,0.081879,0.008147,0,n14k2,acetyl,rep1,0.000264


In [24]:
@interact_manual
def show_btw_plot(system=energyData['System'].unique(),
                  variant=energyData['Variant'].unique(),
                  rep=energyData['Rep'].unique(),
                  vRange=widgets.FloatRangeSlider(
                      min=0,max=.5,value=(0,.005),readout_format='.1e')):
    vMin,vMax=vRange
    plotData=networkDataTable[
        (networkDataTable['System']==system) & \
        (networkDataTable['Variant']==variant) & \
        (networkDataTable['Rep']==rep)].copy()
    if len(plotData)>0:
        display(correlation_data_utilities.bokeh_dataTable_heatMap(
            plotData,Xcol='Resid_1',Ycol='Resid_2',dataCol='Betweenness',
            width=640,height=640,
            rectheight=1,rectwidth=1,
            colorMap=sns.color_palette("plasma", n_colors=256).as_hex(),
            title="Residue 14-47 Flow Betweenness",
            xlabel=None,
            ylabel=None,
            axisFontSize="14pt",
            vmin=vMin,#-plotData['TOTAL.Avg'].abs().max(),
            vmax=vMax))#plotData['TOTAL.Avg'].abs().max()))
    else:
        print("No data present for %s"%('.'.join([system,variant,rep])))

interactive(children=(Dropdown(description='system', options=('n14k2', 'n14y2', 'wt2'), value='n14k2'), Dropdo…