In [5]:
#!git clone https://github.com/wesleymsmith/Piezo_PIP2_binding_analysis.git
#!pip install bokeh
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import scipy as sp
from scipy import stats

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

import os
import sys
import gc
import copy
import glob

import tqdm
import itertools

import bokeh
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, CDSView, GroupFilter, HoverTool
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6

# Visualization

For those who just want to visualize the final results, run the loading cells at the top to
load the needed packages and set the 'baseDir' variable, then start at the cell below.

In [6]:
baseDir="."
sasa_corr=pd.read_csv(baseDir+"/"+"SASA_correlation_dataFrame.csv")
sasa_corr.head()

Unnamed: 0,ResID,SeqID,All_Atom.ExpDist_Mean,All_Atom.ExpDist_RMSE,All_Atom.GammaDist_Mean,All_Atom.GammaDist_RMSE,All_Atom.GammaDist_k,All_Atom.Max_ResTime,All_Atom.N,All_Atom.Total_Occupancy,Coarse_Grain.ExpDist_Mean,Coarse_Grain.ExpDist_RMSE,Coarse_Grain.GammaDist_Mean,Coarse_Grain.GammaDist_RMSE,Coarse_Grain.GammaDist_k,Coarse_Grain.Max_ResTime,Coarse_Grain.N,Coarse_Grain.Total_Occupancy,ResName,SASA
0,1,782,9.28,0.032096,8.047528,0.040372,1.564721,32.0,27.0,171.0,23.03937,0.011198,22.724661,0.011517,1.094233,141.0,256.0,5212.0,ARG,227.088237
1,15,796,10.111111,0.026742,9.169857,0.031632,1.292284,44.0,38.0,268.0,9.094463,0.008829,8.998251,0.007495,0.599033,423.0,309.0,2012.0,ARG,269.308386
2,21,802,21.488372,0.010855,19.911491,0.010335,0.666266,159.0,45.0,810.0,,,,,,0.0,0.0,0.0,ARG,278.032111
3,22,803,78.333333,0.003341,61.328587,0.003401,0.348635,539.0,14.0,903.0,,,,,,0.0,0.0,0.0,ARG,684.723468
4,30,811,9.538462,0.024822,7.386159,0.026995,0.892159,50.0,15.0,87.0,,,,,,0.0,0.0,0.0,LYS,130.928857


In [7]:
@interact_manual
def plot_columns(xCol=sasa_corr.drop(columns=["ResName","ResID","SeqID"]).columns,
                 yCol=sasa_corr.drop(columns=["ResName","ResID","SeqID"]).columns,
                 RMSEmax=(0,1,.001),
                 Nmin=(1,100,1)):
    bokeh.io.output_notebook()
    columns=list(np.unique([xCol,yCol,"ResName","ResID","SeqID",
                        "All_Atom.ExpDist_RMSE",
                        "Coarse_Grain.ExpDist_RMSE",
                        "All_Atom.N",
                        "Coarse_Grain.N"]))
    plotData=sasa_corr[columns].dropna() #,"All_Atom.N",'Coarse_Grain.N']]
    plotData=plotData[(plotData['All_Atom.ExpDist_RMSE'] <RMSEmax) &\
              (plotData['Coarse_Grain.ExpDist_RMSE']<RMSEmax) &\
              (plotData['All_Atom.N']>Nmin)  &\
              (plotData['Coarse_Grain.N']>Nmin)]
    plotData=plotData[(plotData[xCol]>0) & (plotData[yCol]>0)]
    source=ColumnDataSource(plotData)
    view1=CDSView(source=source)
    
    ResNames=list(plotData.ResName.unique())
    
    plot_size_and_tools={'plot_height':640,
                         'plot_width':640,
                         'tools':['pan','wheel_zoom',
                                  'undo','redo','reset','save',
                                  'crosshair','hover']}
    
    p1=figure(**plot_size_and_tools)
    p1.circle(x=xCol,y=yCol,
              source=source,
              color=factor_cmap('ResName',palette=Spectral6,factors=ResNames))
    p1.legend.orientation="vertical"
    hover = p1.select(dict(type=HoverTool))
    hover.tooltips = [(colName,"@{"+colName+"}") for colName in plotData.columns]
    show(p1)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUneENvbCcsIG9wdGlvbnM9KCdBbGxfQXRvbS5FeHBEaXN0X01lYW4nLCAnQWxsX0F0b20uRXhwRGlzdF9STVNFJyzigKY=


In [8]:
sasa_bySeq=sasa_corr
sasa_bySeq=sasa_bySeq.drop(columns='ResID').groupby(['SeqID','ResName']).agg(
    lambda x: x.dropna().mean()).reset_index()
sasa_bySeq.head()

Unnamed: 0,SeqID,ResName,All_Atom.ExpDist_Mean,All_Atom.ExpDist_RMSE,All_Atom.GammaDist_Mean,All_Atom.GammaDist_RMSE,All_Atom.GammaDist_k,All_Atom.Max_ResTime,All_Atom.N,All_Atom.Total_Occupancy,Coarse_Grain.ExpDist_Mean,Coarse_Grain.ExpDist_RMSE,Coarse_Grain.GammaDist_Mean,Coarse_Grain.GammaDist_RMSE,Coarse_Grain.GammaDist_k,Coarse_Grain.Max_ResTime,Coarse_Grain.N,Coarse_Grain.Total_Occupancy,SASA
0,782,ARG,13.315556,0.243393,8.294068,0.03222,1.048774,25.666667,11.0,77.333333,12.049672,0.026987,11.841388,0.041282,2.572553,68.0,201.666667,2194.0,225.408505
1,796,ARG,14.946986,0.017608,13.65048,0.018948,0.914717,107.666667,45.666667,533.333333,6.292446,0.038368,5.860565,0.125326,9.178541,152.666667,302.0,1127.0,252.309425
2,802,ARG,19.317553,0.015481,16.802311,0.016214,0.82366,113.333333,31.0,467.0,,,,,,0.0,0.0,0.0,313.408641
3,803,ARG,34.570931,0.008934,28.37685,0.008359,0.59138,321.0,60.0,758.666667,,,,,,0.0,0.0,0.0,497.502261
4,811,LYS,9.538462,0.024822,7.386159,0.026995,0.892159,16.666667,5.0,29.0,,,,,,0.0,0.0,0.0,120.280804


In [9]:
@interact_manual
def plot_columns(xCol=sasa_corr.drop(columns=["ResName","SeqID","ResID"]).columns,
                 yCol=sasa_corr.drop(columns=["ResName","SeqID","ResID"]).columns,
                 RMSEmax=(0,1,.001),
                 Nmin=(1,100,1)):
    bokeh.io.output_notebook()
    columns=list(np.unique([xCol,yCol,"ResName","ResID","SeqID",
                        "All_Atom.ExpDist_RMSE",
                        "Coarse_Grain.ExpDist_RMSE",
                        "All_Atom.N",
                        "Coarse_Grain.N"]))
    tempData=sasa_corr[columns].dropna()
    tempData=tempData[(tempData[xCol]>0) & \
                      (tempData[yCol]>0) & \
                      (tempData['All_Atom.ExpDist_RMSE'] <RMSEmax) &\
                      (tempData['Coarse_Grain.ExpDist_RMSE']<RMSEmax) &\
                      (tempData['All_Atom.N']>Nmin)  &\
                      (tempData['Coarse_Grain.N']>Nmin)]
    columns=list(np.unique([xCol,yCol,"ResName","SeqID",
                        "All_Atom.ExpDist_RMSE",
                        "Coarse_Grain.ExpDist_RMSE",
                        "All_Atom.N",
                        "Coarse_Grain.N"]))
    plotData=tempData[columns].dropna() #,"All_Atom.N",'Coarse_Grain.N']]
    #plotData=plotData[(plotData[xCol]>0) & \
    #                  (plotData[yCol]>0) & \
    #                  (plotData['All_Atom.ExpDist_RMSE'] <RMSEmax) &\
    #                  (plotData['Coarse_Grain.ExpDist_RMSE']<RMSEmax) &\
    #                  (plotData['All_Atom.N']>Nmin)  &\
    #                  (plotData['Coarse_Grain.N']>Nmin)]
    source=ColumnDataSource(plotData)
    view1=CDSView(source=source)
    
    ResNames=list(plotData.ResName.unique())
    
    plot_size_and_tools={'plot_height':640,
                         'plot_width':640,
                         'tools':['pan','wheel_zoom',
                                  'undo','redo','reset','save',
                                  'crosshair','hover']}
    
    p1=figure(**plot_size_and_tools)
    p1.circle(x=xCol,y=yCol,
              source=source,
              color=factor_cmap('ResName',palette=Spectral6,factors=ResNames))
    p1.legend.orientation="vertical"
    hover = p1.select(dict(type=HoverTool))
    hover.tooltips = [(colName,"@{"+colName+"}") for colName in plotData.columns]
    show(p1)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUneENvbCcsIG9wdGlvbnM9KCdBbGxfQXRvbS5FeHBEaXN0X01lYW4nLCAnQWxsX0F0b20uRXhwRGlzdF9STVNFJyzigKY=
