In [213]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import scipy as sp
from scipy import stats

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

import os
import sys
import gc
import copy
import glob

import tqdm
import itertools

import pymc

In [2]:
baseDir="./"

In [312]:
cg_occupancy_data_files=glob.glob(baseDir+"Coarse_Grain_Occupancy_Data.chunk*")
print "first cg data frame: '%s'"%np.sort(cg_occupancy_data_files)[0]

first cg data frame: './Coarse_Grain_Occupancy_Data.chunk.aa'


In [313]:
cg_frames=[]
for iFile,cg_file in tqdm.tqdm_notebook(enumerate(np.sort(cg_occupancy_data_files))):
    if iFile==0:
        cg_frames.append(pd.read_csv(cg_file))
        colNames=cg_frames[0].columns
    else:
        cg_frames.append(pd.read_csv(cg_file,names=colNames))
cg_occupancy_data=pd.concat(cg_frames)
cg_frames=[]
gc.collect()
aa_occupancy_data=pd.read_csv(baseDir+"All_Atom_Occupancy_Data.csv")

print cg_occupancy_data.head()
print aa_occupancy_data.head()

HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))


   ResID  SeqID  Frame    Time  Occupancy
0   4236   2528      0     0.0          0
1   4236   2528      1  1000.0          0
2   4236   2528      2  2000.0          0
3   4236   2528      3  3000.0          0
4   4236   2528      4  4000.0          0
   ResID  SeqID  Frame  Time  Occupancy
0   4236   2528      0   0.0          0
1   4236   2528      1   1.0          0
2   4236   2528      2   2.0          0
3   4236   2528      3   3.0          0
4   4236   2528      4   4.0          0


Now we can extract residence times from this occupancy by first finding
all 'runs' within the occupancy series of each residue.

This can be accomplished using itertools.groupby to obtain the lengths of each continguous interval where occupancy was non-zero. Since these are discrete integer values, we can easily use the function 'unique' to bin them into a histogram like form by setting the 'return_counts' option to 'True'

We can then plot the resulting distribution...
It seems to look quite exponential like, so it would make sense to try and fit either a geometric (if we think of each frame like an individual 'trial') or an exponential (if want to think of each run's length as a 'wait time')

The exponential distribution with a mean (or characteristic length, $\lambda$) is given by:
$$p(x,\lambda)=\frac{e^{-x/\lambda}}{\lambda}$$


To fit an exponential distribution to this data we first compute the characteristic length ($\gamma$) as the mean ($\bar{X}$) of the observed residence times ($X_i$)

It is possible, however, that the distribution is not perfectly exponential. One common alternative would be to use a gamma distribution instead.

For the gamma distribution, we can use closed form estimators based on a generalized gamma distribution:

$$\theta=\frac{1}{N^2}*(N

In [314]:
def extract_runs(x):
    return [len(list(gg)) for kk,gg in itertools.groupby(x, bool) if kk]

def extract_resDist(x):
    return(np.unique(extract_runs(x),return_counts=True))

def bin_runs(x,binWidth=1.000,frameRate=1.,center='right'):
    init_dist=extract_resDist(x)
    x_dist=(init_dist[0]*frameRate,init_dist[1])
    binMax=np.max(x_dist[0])
    nBins=np.ceil(binMax/binWidth)+1
    hbins=np.arange(nBins)*binWidth
    temp_dist=np.histogram(x_dist[0],weights=x_dist[1],bins=hbins)
    outDat=[temp_dist[1],temp_dist[0]]
    if center=='midpoint':
        outDat[0]=(temp_dist[1][1:]+temp_dist[1][:-1])/2.
    elif center=="left":
        outDat[0]=temp_dist[1][:-1]
    elif center=="right":
        outDat[0]=temp_dist[1][1:]
    return(outDat)

def expDist(x,l):
    return np.exp(-x/l)/l

def frequencyDistribution_mle_exp_params(bin_dist,bias_correction=True):
    Nv=np.sum(bin_dist[1])
    if bias_correction & (Nv>2):
        Nv=Nv-2
    return (np.sum(bin_dist[0]*bin_dist[1])/(1.*Nv))

def beta_dist(x,a,b):
    return (x**(a-1.)*(1.-x)**(b-1.))/\
        (sp.special.gamma(a)*sp.special.gamma(b)/sp.special.gamma(a+b))
def gamma_dist(x,t,k):
    return 1/(sp.special.gamma(k)*(t**k))*x**(k-1.)*np.exp(-x/t)

def frequencyDistribution_mle_gamma_params(bin_dist,bias_correction=True):
    xi,fi=bin_dist
    Nv=np.sum(fi)
    sum_f=np.sum(fi*xi)
    sum_lnf=np.sum(fi*np.log(xi))
    sum_flnf=np.sum(fi*xi*np.log(xi))
    k_est=(Nv*sum_f)/(Nv*sum_flnf-sum_lnf*sum_f)
    t_est=1./(Nv**2)*(Nv*sum_flnf-sum_lnf*sum_f)
    if bias_correction:
        t_est=Nv*t_est/(Nv-1)
        k_est=k_est-1./Nv*(3.*k_est-2./3.*(k_est/(1+k_est))-4./5.*(k_est/(1+k_est)**2))
    return [t_est,k_est]

In [315]:
@interact_manual
def fit_aa_res(tempResID=aa_occupancy_data[aa_occupancy_data.Occupancy>0].ResID.sort_values().unique(),
               bwidth=(1.,10.),binCenter=["midpoint","right"]):
    occData=aa_occupancy_data[aa_occupancy_data.ResID==tempResID].Occupancy
    #bwidth=10.0
    tempDist=bin_runs(occData,binWidth=bwidth,frameRate=.120,center=binCenter)
    fig,axs=plt.subplots(1,2)
    fig.set_figheight(9)
    fig.set_figwidth(12)
    ax=axs.flat[0]
    ax.bar(tempDist[0],1.*tempDist[1]/np.sum(tempDist[1]),width=bwidth,alpha=.5)
    #plt.show()
    tempGammaParms=frequencyDistribution_mle_gamma_params(tempDist,bias_correction=True)
    expLambda=np.sum(1.*tempDist[0]*tempDist[1])/np.sum(tempDist[1])
    ax.plot(tempDist[0],gamma_dist(tempDist[0],tempGammaParms[0],tempGammaParms[1]),
                label='"Gamma:\n t=%.2f, k=%.2f;\n Mean=%.2f"'%(
                    tempGammaParms[0],tempGammaParms[1],tempGammaParms[0]*tempGammaParms[1]*1.))
    ax.plot(tempDist[0],
                expDist(tempDist[0],
                        expLambda),
                label='"Exp:\n l=%.2f"'%expLambda)
    ax.legend()
    ax.set_title("All Atom (N = %g)"%np.sum(tempDist[1]))
    print "aa RMSE expDist:",
    print "%.4f"%np.sqrt(
        np.sum(
            (tempDist[1]/np.sum(tempDist[1])-\
            expDist(tempDist[0],expLambda))**2)/\
        len(tempDist[1]))
    print "aa RMSE gammaDist:",
    print "%.4f"%np.sqrt(
        np.sum(
            (tempDist[1]/np.sum(tempDist[1])-\
            gamma_dist(tempDist[0],tempGammaParms[0],tempGammaParms[1]))**2)/\
        len(tempDist[1]))
    
    occData=cg_occupancy_data[cg_occupancy_data.ResID==tempResID].Occupancy
    tempDist=bin_runs(occData,binWidth=bwidth,frameRate=1.0,center=binCenter)
    ax=axs.flat[1]
    ax.bar(tempDist[0],1.*tempDist[1]/np.sum(tempDist[1]),width=bwidth,alpha=.5)
    #plt.show()
    tempGammaParms=frequencyDistribution_mle_gamma_params(tempDist,bias_correction=True)
    expLambda=frequencyDistribution_mle_exp_params(tempDist,bias_correction=True)
    ax.plot(tempDist[0],gamma_dist(tempDist[0],tempGammaParms[0],tempGammaParms[1]),
                label='"Gamma:\n t=%.2f, k=%.2f;\n Mean=%.2f"'%(
                    tempGammaParms[0],tempGammaParms[1],tempGammaParms[0]*tempGammaParms[1]*1.))
    ax.plot(tempDist[0],
                expDist(tempDist[0],
                        expLambda),
                label='"Exp:\n l=%.2f"'%expLambda)
    ax.legend()
    ax.set_title('Coarse Grain (N = %g)'%np.sum(tempDist[1]))
    print "cg RMSE expDist:",
    print "%.4f"%np.sqrt(
        np.sum(
            (tempDist[1]/np.sum(tempDist[1])-\
            expDist(tempDist[0],expLambda))**2)/\
        len(tempDist[1]))
    print "cg RMSE gammaDist:",
    print "%.4f"%np.sqrt(
        np.sum(
            (tempDist[1]/np.sum(tempDist[1])-\
            gamma_dist(tempDist[0],tempGammaParms[0],tempGammaParms[1]))**2)/\
        len(tempDist[1]))
    
    plt.show()

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUndGVtcFJlc0lEJywgb3B0aW9ucz0oMSwgMzAsIDYzLCA2NSwgMTYxLCAxNjIsIDE2NywgMTY4LCAyNDIsIDI0MyzigKY=


In [316]:
aaFitDat=aa_occupancy_data #[aa_occupancy_data.ResID.isin([3086,3256])]
aaFitFrame=aaFitDat.groupby(['ResID','SeqID']).agg(
    {"Occupancy": {
         "Total_Occupancy":lambda x: .12*np.sum(x),
         "N":lambda x: len(extract_runs(x)),
         "Max_ResTime":lambda x: np.max(extract_runs(x))*.12 if np.sum(x) > 0 else 0,
         "GammaDist_Params":lambda x: list(list(frequencyDistribution_mle_gamma_params(
         bin_runs(x,frameRate=.12,binWidth=binwidth),bias_correction=True))) if np.sum(x)>0 else [np.nan,np.nan],
         "ExpDist_Mean":lambda x: frequencyDistribution_mle_exp_params(
            bin_runs(x,frameRate=.12,binWidth=binwidth)) if np.sum(x)>0 else np.nan}
    })
aaFitFrame.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,Occupancy,Occupancy,Occupancy,Occupancy,Occupancy
Unnamed: 0_level_1,Unnamed: 1_level_1,ExpDist_Mean,N,GammaDist_Params,Max_ResTime,Total_Occupancy
ResID,SeqID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,782,7.2,6,"[0.0, nan]",1.56,2.64
15,796,,0,"[nan, nan]",0.0,0.0
21,802,,0,"[nan, nan]",0.0,0.0
22,803,,0,"[nan, nan]",0.0,0.0
30,811,4.8,2,"[0.0, nan]",0.12,0.24


In [317]:
binwidth=4.8

aaFitDat=aa_occupancy_data #[aa_occupancy_data.ResID.isin([3086,3256])]
aaFitFrame=aaFitDat.groupby(['ResID','SeqID']).agg(
    {"Occupancy": {
         "Total_Occupancy":lambda x: .12*np.sum(x),
         "N":lambda x: len(extract_runs(x)),
         "Max_ResTime":lambda x: np.max(extract_runs(x))*.12 if np.sum(x) > 0 else 0,
         "GammaDist_Params":lambda x: list(list(frequencyDistribution_mle_gamma_params(
         bin_runs(x,frameRate=.12,binWidth=binwidth),bias_correction=True))) if np.sum(x)>0 else [np.nan,np.nan],
         "ExpDist_Mean":lambda x: frequencyDistribution_mle_exp_params(
            bin_runs(x,frameRate=.12,binWidth=binwidth)) if np.sum(x)>0 else np.nan}
    })
aaFitFrame.columns=aaFitFrame.columns.map(lambda x: x[1])
aaFitFrame=aaFitFrame.reset_index()
aaFitFrame['GammaDist_Mean']=aaFitFrame.GammaDist_Params.map(np.product)
aaFitFrame['GammaDist_k']=aaFitFrame.GammaDist_Params.map(lambda x: x[1])
aaFitFrame=aaFitFrame.drop(columns='GammaDist_Params')
aaFitFrame['SimType']='All_Atom'
aaFitFrame=aaFitFrame[["SimType","ResID","SeqID","Total_Occupancy","N","Max_ResTime",
                       "ExpDist_Mean","GammaDist_Mean","GammaDist_k"]]
#aaFitFrame=aaFitFrame.dropna()
print aaFitFrame.head()

cgFitDat=cg_occupancy_data #[aa_occupancy_data.ResID.isin([3086,3256])]
cgFitFrame=cgFitDat.groupby(['ResID','SeqID']).agg(
    {"Occupancy": {
        "Total_Occupancy":np.sum,
        "N":lambda x: len(extract_runs(x)),
        "Max_ResTime":lambda x: np.max(extract_runs(x)) if np.sum(x) > 0 else 0,
        "GammaDist_Params":lambda x: list(list(frequencyDistribution_mle_gamma_params(
        bin_runs(x,frameRate=1.,binWidth=binwidth),bias_correction=True))) if np.sum(x)>0 else [np.nan,np.nan],
        "ExpDist_Mean":lambda x: frequencyDistribution_mle_exp_params(
            bin_runs(x,frameRate=1.,binWidth=binwidth)) if np.sum(x)>0 else np.nan}
    })
cgFitFrame.columns=cgFitFrame.columns.map(lambda x: x[1])
cgFitFrame=cgFitFrame.reset_index()
cgFitFrame['GammaDist_Mean']=cgFitFrame.GammaDist_Params.map(np.product)
cgFitFrame['GammaDist_k']=cgFitFrame.GammaDist_Params.map(lambda x: x[1])
cgFitFrame=cgFitFrame.drop(columns='GammaDist_Params')
cgFitFrame['SimType']='Coarse_Grain'
cgFitFrame=cgFitFrame[["SimType","ResID","SeqID","Total_Occupancy","N","Max_ResTime",
                       "ExpDist_Mean","GammaDist_Mean","GammaDist_k"]]
#cgFitFrame=cgFitFrame.dropna()
print cgFitFrame.head()



    SimType  ResID  SeqID  Total_Occupancy  N  Max_ResTime  ExpDist_Mean  \
0  All_Atom      1    782             2.64  6         1.56           7.2   
1  All_Atom     15    796             0.00  0         0.00           NaN   
2  All_Atom     21    802             0.00  0         0.00           NaN   
3  All_Atom     22    803             0.00  0         0.00           NaN   
4  All_Atom     30    811             0.24  2         0.12           4.8   

   GammaDist_Mean  GammaDist_k  
0             NaN          NaN  
1             NaN          NaN  
2             NaN          NaN  
3             NaN          NaN  
4             NaN          NaN  
        SimType  ResID  SeqID  Total_Occupancy    N  Max_ResTime  \
0  Coarse_Grain      1    782             5212  256          141   
1  Coarse_Grain     15    796             2012  309          423   
2  Coarse_Grain     21    802                0    0            0   
3  Coarse_Grain     22    803                0    0            0   
4  Co

In [318]:
jointFitFrame=pd.concat([aaFitFrame,cgFitFrame])
jointFitFrame.to_csv(baseDir+"joint_distribution_fit_frame.csv",index=False)
jointFitFrame.head()

Unnamed: 0,SimType,ResID,SeqID,Total_Occupancy,N,Max_ResTime,ExpDist_Mean,GammaDist_Mean,GammaDist_k
0,All_Atom,1,782,2.64,6,1.56,7.2,,
1,All_Atom,15,796,0.0,0,0.0,,,
2,All_Atom,21,802,0.0,0,0.0,,,
3,All_Atom,22,803,0.0,0,0.0,,,
4,All_Atom,30,811,0.24,2,0.12,4.8,,


In [319]:
jointFitMelt=jointFitFrame.melt(id_vars=["SimType","ResID","SeqID"],var_name="Fit_Param")
print jointFitMelt.head()

jointFitWide=jointFitMelt
jointFitWide["Measurement"]=jointFitWide.SimType+"."+jointFitWide.Fit_Param
jointFitWide=jointFitWide.drop(columns=["SimType","Fit_Param"])
jointFitWide=jointFitWide.reset_index()
jointFitWide=pd.pivot_table(index=["ResID","SeqID"],columns="Measurement",values="value",data=jointFitWide)
#jointFitWide.columns=jointFitWide.columns.map(lambda x: x[1])
jointFitWide=jointFitWide.reset_index()
print jointFitWide.columns
jointFitWide.to_csv(baseDir+"joint_Fit_Data_wide.csv",index=False)
jointFitWide.head()

    SimType  ResID  SeqID        Fit_Param  value
0  All_Atom      1    782  Total_Occupancy   2.64
1  All_Atom     15    796  Total_Occupancy   0.00
2  All_Atom     21    802  Total_Occupancy   0.00
3  All_Atom     22    803  Total_Occupancy   0.00
4  All_Atom     30    811  Total_Occupancy   0.24
Index([u'ResID', u'SeqID', u'All_Atom.ExpDist_Mean',
       u'All_Atom.GammaDist_Mean', u'All_Atom.GammaDist_k',
       u'All_Atom.Max_ResTime', u'All_Atom.N', u'All_Atom.Total_Occupancy',
       u'Coarse_Grain.ExpDist_Mean', u'Coarse_Grain.GammaDist_Mean',
       u'Coarse_Grain.GammaDist_k', u'Coarse_Grain.Max_ResTime',
       u'Coarse_Grain.N', u'Coarse_Grain.Total_Occupancy'],
      dtype='object', name=u'Measurement')


Measurement,ResID,SeqID,All_Atom.ExpDist_Mean,All_Atom.GammaDist_Mean,All_Atom.GammaDist_k,All_Atom.Max_ResTime,All_Atom.N,All_Atom.Total_Occupancy,Coarse_Grain.ExpDist_Mean,Coarse_Grain.GammaDist_Mean,Coarse_Grain.GammaDist_k,Coarse_Grain.Max_ResTime,Coarse_Grain.N,Coarse_Grain.Total_Occupancy
0,1,782,7.2,,,1.56,6.0,2.64,23.187402,22.868932,1.155617,141.0,256.0,5212.0
1,15,796,,,,0.0,0.0,0.0,9.771987,9.667189,0.672089,423.0,309.0,2012.0
2,21,802,,,,0.0,0.0,0.0,,,,0.0,0.0,0.0
3,22,803,,,,0.0,0.0,0.0,,,,0.0,0.0,0.0
4,30,811,4.8,,,0.12,2.0,0.24,,,,0.0,0.0,0.0


In [320]:
jointFitWide=pd.read_csv(baseDir+"joint_Fit_Data_Wide.csv")
jointFitWide.head()

Unnamed: 0,ResID,SeqID,All_Atom.ExpDist_Mean,All_Atom.GammaDist_Mean,All_Atom.GammaDist_k,All_Atom.Max_ResTime,All_Atom.N,All_Atom.Total_Occupancy,Coarse_Grain.ExpDist_Mean,Coarse_Grain.GammaDist_Mean,Coarse_Grain.GammaDist_k,Coarse_Grain.Max_ResTime,Coarse_Grain.N,Coarse_Grain.Total_Occupancy
0,1,782,7.2,,,1.56,6.0,2.64,23.187402,22.868932,1.155617,141.0,256.0,5212.0
1,15,796,,,,0.0,0.0,0.0,9.771987,9.667189,0.672089,423.0,309.0,2012.0
2,21,802,,,,0.0,0.0,0.0,,,,0.0,0.0,0.0
3,22,803,,,,0.0,0.0,0.0,,,,0.0,0.0,0.0
4,30,811,4.8,,,0.12,2.0,0.24,,,,0.0,0.0,0.0


In [321]:
sasa_data=pd.read_csv(baseDir+"basicResidueSASA.csv")
sasa_data.head()

Unnamed: 0,System,Frame,ARG_1,ARG_15,ARG_21,ARG_22,HSD_27,LYS_30,LYS_42,ARG_63,...,HSD_4198,ARG_4210,LYS_4213,ARG_4222,ARG_4225,LYS_4236,ARG_4242,LYS_4249,ARG_4252,ARG_4254
0,Piezo_PIP2,0,87.971682,72.99816,80.786466,31.312725,67.773444,20.623376,55.209823,135.463408,...,43.275383,69.884503,24.94398,4.626739,88.281732,16.574375,15.564438,38.770691,96.299466,130.583524
1,Piezo_PIP2,1,76.917992,102.304355,79.381224,13.995005,86.148243,33.957993,55.538281,128.435914,...,77.56103,116.671583,39.405101,36.539325,56.898822,23.24208,33.811875,34.774065,88.537691,131.083016
2,Piezo_PIP2,2,92.678538,90.994392,68.379834,15.453099,90.343862,27.959345,53.981236,125.491502,...,52.222729,108.73483,46.284419,32.924167,102.064085,55.765358,36.739432,49.450528,125.012244,130.259988
3,Piezo_PIP2,3,88.609785,87.762004,72.502762,1.874848,84.17082,14.094714,57.859521,128.619427,...,57.925787,78.240005,41.645692,24.706268,97.861685,57.142842,24.40906,61.42445,105.862211,120.37742
4,Piezo_PIP2,4,104.101779,87.028277,98.756076,4.509094,88.035775,19.995091,51.474539,130.784876,...,57.793013,76.84782,39.216604,34.75695,88.014204,64.173321,45.711059,61.53414,101.307814,128.690753


In [322]:
sasa_data_long=sasa_data.melt(id_vars=['System','Frame'],var_name='Residue',value_name='SASA')
sasa_data_long['ResID']=sasa_data_long.Residue.map(lambda x: x.split('_')[-1])
sasa_data_long['ResName']=sasa_data_long.Residue.map(lambda x: x.split('_')[0])
sasa_data_long['SimType']='All_Atom'
sasa_data_long=sasa_data_long[['System','SimType','ResID','ResName','Frame','SASA']]
sasa_data_long.head()

Unnamed: 0,System,SimType,ResID,ResName,Frame,SASA
0,Piezo_PIP2,All_Atom,1,ARG,0,87.971682
1,Piezo_PIP2,All_Atom,1,ARG,1,76.917992
2,Piezo_PIP2,All_Atom,1,ARG,2,92.678538
3,Piezo_PIP2,All_Atom,1,ARG,3,88.609785
4,Piezo_PIP2,All_Atom,1,ARG,4,104.101779


In [323]:
sasa_summary=sasa_data_long.groupby(['System','SimType','ResID','ResName']).agg({'SASA':np.mean})
sasa_summary=sasa_summary.reset_index()
sasa_summary.ResID=pd.Series(sasa_summary.ResID,dtype=int)
sasa_summary.head()

Unnamed: 0,System,SimType,ResID,ResName,SASA
0,Piezo_PIP2,All_Atom,1,ARG,125.327735
1,Piezo_PIP2,All_Atom,1007,ARG,53.5128
2,Piezo_PIP2,All_Atom,1038,LYS,42.051687
3,Piezo_PIP2,All_Atom,1041,ARG,95.207055
4,Piezo_PIP2,All_Atom,1045,LYS,46.957189


In [324]:
sasa_corr=jointFitWide.set_index("ResID").join(
        other=sasa_summary.drop(columns=["System","SimType"]).set_index("ResID")
    ).reset_index()
sasa_corr.to_csv(baseDir+"SASA_correlation_dataFrame.csv",index=False)
sasa_corr.head()

Unnamed: 0,ResID,SeqID,All_Atom.ExpDist_Mean,All_Atom.GammaDist_Mean,All_Atom.GammaDist_k,All_Atom.Max_ResTime,All_Atom.N,All_Atom.Total_Occupancy,Coarse_Grain.ExpDist_Mean,Coarse_Grain.GammaDist_Mean,Coarse_Grain.GammaDist_k,Coarse_Grain.Max_ResTime,Coarse_Grain.N,Coarse_Grain.Total_Occupancy,ResName,SASA
0,1,782,7.2,,,1.56,6.0,2.64,23.187402,22.868932,1.155617,141.0,256.0,5212.0,ARG,125.327735
1,15,796,,,,0.0,0.0,0.0,9.771987,9.667189,0.672089,423.0,309.0,2012.0,ARG,113.897975
2,21,802,,,,0.0,0.0,0.0,,,,0.0,0.0,0.0,ARG,87.305075
3,22,803,,,,0.0,0.0,0.0,,,,0.0,0.0,0.0,ARG,5.58638
4,30,811,4.8,,,0.12,2.0,0.24,,,,0.0,0.0,0.0,LYS,27.970245


In [325]:
sasa_corr=pd.read_csv(baseDir+"SASA_correlation_dataFrame.csv")

In [326]:
import bokeh
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, CDSView, GroupFilter, HoverTool
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6

In [327]:
@interact_manual
def plot_columns(xCol=sasa_corr.drop(columns=["ResName","ResID","SeqID"]).columns,
                 yCol=sasa_corr.drop(columns=["ResName","ResID","SeqID"]).columns):
    bokeh.io.output_notebook()
    plotData=sasa_corr[[xCol,yCol,"ResName","ResID","SeqID"]].dropna() #,"All_Atom.N",'Coarse_Grain.N']]
    plotData=plotData[(plotData[xCol]>0) & (plotData[yCol]>0)]
    source=ColumnDataSource(plotData)
    view1=CDSView(source=source)
    
    ResNames=list(plotData.ResName.unique())
    
    plot_size_and_tools={'plot_height':640,
                         'plot_width':640,
                         'tools':['pan','wheel_zoom',
                                  'undo','redo','reset','save',
                                  'crosshair','hover']}
    
    p1=figure(**plot_size_and_tools)
    p1.circle(x=xCol,y=yCol,
              source=source,
              color=factor_cmap('ResName',palette=Spectral6,factors=ResNames))
    p1.legend.orientation="vertical"
    hover = p1.select(dict(type=HoverTool))
    hover.tooltips = [(colName,"@{"+colName+"}") for colName in plotData.columns]
    show(p1)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUneENvbCcsIG9wdGlvbnM9KCdBbGxfQXRvbS5FeHBEaXN0X01lYW4nLCAnQWxsX0F0b20uR2FtbWFEaXN0X01lYW7igKY=
