In [1]:
import numpy as np
import scipy as sp
import pandas as pd

import os

import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

import bokeh as bk
from bokeh.plotting import figure, output_notebook, output_file, reset_output, show, ColumnDataSource
from bokeh.models import LinearColorMapper, ColorBar, FuncTickFormatter
reset_output()
output_notebook()

In [2]:
##If you are using google colab uncomment the lines below and run this cell:
#!git clone https://github.com/wesleymsmith/Piezo_State_Contact_Delta_Viewer.git dataFiles
#!for csvFile in dataFiles/*csv.chunk*; do ln -s $csvFile ./;done
#!for csvFile in dataFiles/*.csv; do ln -s $csvFile ./;done
#!git clone https://github.com/LynaLuo-Lab/network_analysis_scripts.git
#!ln -s network_analysis_scripts/python_version/correlation_data_utilities.py ./
#!ln -s dataFiles/ReciprocalDistancePCAprojections.txt ./

In [2]:
import correlation_data_utilities

In [4]:
nRes=4254
nChains=3
resPerChain=nRes/nChains

tickFormatter=FuncTickFormatter(code="""
    def ticker():
        return "{:g}"
""")

tempFrame=pd.read_csv('Piezo_State_Contact_Delta_Frame.csv')
valueColName='DeltaMean'

vclip=np.min([np.abs(tempFrame[valueColName].min()),
              np.abs(tempFrame[valueColName].max())])

mapper=LinearColorMapper(palette=sns.color_palette("coolwarm", n_colors=256).as_hex(),
                         low=-vclip,
                         high=vclip)

heatMapColumnSource=ColumnDataSource(tempFrame)

toolTips=[(colName,"@{}".format(colName)) for colName in tempFrame.columns]

plotWidth=800
plotHeight=800
p=bk.plotting.figure(plot_width=plotWidth,plot_height=plotHeight,
            tooltips=toolTips)

p.background_fill_color="black"
p.background_fill_alpha=.75

color_bar=ColorBar(color_mapper=mapper,location='top_left')

p.rect(x='X',y='Y',
       width=1,height=1,
       source=heatMapColumnSource,
       fill_color={'field':valueColName,'transform':mapper},line_color=None)
p.add_layout(color_bar)
show(p)

# View Results from Reciprocal Pair Distance PCA

This data was constructed by performing principal component analysis over the reciprocal distance timeseries for all collision pairs. This importance is calculated as the RMS of each pair's component value over the top 97 principal components (which account for 90% of the total variance). I.e. this importance is based on the fluctuation of reciprocal distances, so nearby residues will tend to be weighted as more important.

## Visulaize PCA clusters
The first two cells will load the pca projection data and visualize component pair projections in 2D. This can provide an idea of how PCA clusters the two states. The plot will be colored by time with the open state plotted as dots and the open state as x's.

In [6]:
X_pca=np.loadtxt("ReciprocalDistancePCAprojections.txt")

In [15]:
@interact
def plot_rDist_PCA_2D(Xind=widgets.IntText(min=0,max=X_pca.shape[1],value=0),
                      Yind=widgets.IntText(min=0,max=X_pca.shape[1],value=1)):
    plt.figure(figsize=(9,9))
    
    colors=np.array(sns.color_palette("viridis", n_colors=201).as_hex())
    frameInds=np.arange(100)
    plt.scatter(x=X_pca[frameInds,Xind],y=X_pca[frameInds,Yind],
                c=colors[frameInds],s=12,marker='.',label='Closed')
    frameInds=np.arange(100,201)
    plt.scatter(x=X_pca[frameInds,Xind],y=X_pca[frameInds,Yind],
                c=colors[frameInds],s=12,marker='x',label='Open')
    plt.xlabel('PCA mode %g'%Xind)
    plt.ylabel('PCA mode %g'%Yind)
    plt.colorbar(label='Time')
    plt.legend()
    plt.show()

interactive(children=(IntText(value=0, description='Xind'), IntText(value=1, description='Yind'), Output()), _…

## Load top 10k significant pairs and visualize pair signficance matrix

One way to get an idea of which pairs are most important is by examining the 'components' from PCA.
Here each component is a vector with length equal to the number of residue pairs that was analyzed.
Each entry of the component represents a signed fluctuation that the corresponding residue pair is contributing.
(As a note, these components are essentially normalized eigenvectors of the covariance matrix for the reciprocal distances between the set of residue pairs considered)

Taking the root mean square of contributions from a given entry yields a measure of the importance of that residue. We can then rank residues using this importance metric. Here, we saved only the top ten thousand out of the aproximately 1.25 million pairs (roughly the top 1%).

The results can be viewed using the interactive matrix plot. As before, hovering over a plot element will produce a popup which will display the corresponding data entry from the table.

In addition to importance, a number of other features are provided, such as the mean distance and reciprocal distance.

In [3]:
top10000pair_infoTable=pd.read_csv('top10000_ReciprocalDistancePCA_pairs.csv')
top10000pair_infoTable.sort_values('Rank').head(n=10)

Unnamed: 0,X,Y,X_SeqInd,Y_SeqInd,Importance,Rank,closed_ReciprocalDistance_Mean,closed_ReciprocalDistance_Std,closed_Distance_Mean,closed_Distance_Std,open_ReciprocalDistance_Mean,open_ReciprocalDistance_Std,open_Distance_Mean,open_Distance_Std
6770,352,349,1134,1131,0.170284,1.0,0.283671,0.160847,4.443885,1.690809,0.420036,0.173206,2.996702,1.562802
6675,349,352,1131,1134,0.170284,1.0,0.283671,0.160847,4.443885,1.690809,0.420036,0.173206,2.996702,1.562802
2517,1903,1899,1267,1263,0.142832,2.0,0.43734,0.140198,2.755172,1.57354,0.444669,0.157977,2.715097,1.38304
2491,1899,1903,1263,1267,0.142832,2.0,0.43734,0.140198,2.755172,1.57354,0.444669,0.157977,2.715097,1.38304
8959,495,474,1277,1256,0.142663,3.0,0.509566,0.126601,2.161808,0.831112,0.398792,0.201105,3.555907,2.165605
8858,474,495,1256,1277,0.142663,3.0,0.509566,0.126601,2.161808,0.831112,0.398792,0.201105,3.555907,2.165605
9016,528,420,1310,1202,0.139563,4.0,0.43231,0.169096,2.800672,1.293648,0.238021,0.135154,5.092263,1.736524
8618,420,528,1202,1310,0.139563,4.0,0.43231,0.169096,2.800672,1.293648,0.238021,0.135154,5.092263,1.736524
2565,1913,1860,1277,1224,0.134813,5.0,0.43169,0.174209,2.838161,1.331105,0.544737,0.0815,1.890793,0.376361
2364,1860,1913,1224,1277,0.134813,5.0,0.43169,0.174209,2.838161,1.331105,0.544737,0.0815,1.890793,0.376361


In [5]:
@interact_manual
def plot_topPairs(rankCut=widgets.IntSlider(
        min=1,
        max=top10000pair_infoTable.Rank.max(),
        value=100
    )):
    plotTable=top10000pair_infoTable[top10000pair_infoTable.Rank<=rankCut].copy()
    correlation_data_utilities.bokeh_dataTable_heatMap(
        plotData=plotTable,
        Xcol='X',Ycol='Y',dataCol='Rank',
        width=640,height=640,
        rectheight=1,rectwidth=1,
        colorMap=np.flip(sns.color_palette("viridis", n_colors=256).as_hex()),
        title=None,
        xlabel=None,
        ylabel=None,
        axisFontSize="14pt",
        vmin=None,vmax=None)

interactive(children=(IntSlider(value=100, description='rankCut', max=5000, min=1), Button(description='Run In…

# Generate table of potential disulfide linkage residue pairs

We seek to filter down our top importance residue pairs by taking only residues which are close together in the closed state, but move far apart in the open state

The first cell below will allow you to pick a cutoff for the maximum allowed distance between residue pairs in the closed state and the minimum allowed increase (deltaCut) in distance from closed to open state.
It will print out the top 100 pairs (though there may be more) along with a plot of the filtered selection

The subsequent cell will allow you to save this data to disk

The last cell will allow you to generate an interactive matrix plot

In [4]:
@interact
def sigTab(
    maxCut=widgets.FloatSlider(min=0,max=10,value=4.0),
    deltaCut=widgets.FloatSlider(min=0,max=10,value=3.0)):
    sigDeltaTable=top10000pair_infoTable[((top10000pair_infoTable['closed_Distance_Mean']<=maxCut) & \
    ((top10000pair_infoTable['open_Distance_Mean']- \
     top10000pair_infoTable['closed_Distance_Mean'])>deltaCut))]
    print(sigDeltaTable.shape)
    pd.set_option("display.max_rows",None)
    display(sigDeltaTable.sort_values('Importance',ascending=False).head(n=100))
    plotTab=top10000pair_infoTable
    plotTab['Label']='NonSignificant'
    plotTab['Label'][sigDeltaTable.index]='DiSulfideTarget'
    sns.scatterplot(x='closed_Distance_Mean',
                    y='open_Distance_Mean',
                    hue='Label',alpha=.5,
                    data=plotTab)
    plt.show()

interactive(children=(FloatSlider(value=4.0, description='minCut', max=10.0), FloatSlider(value=2.0, descripti…

In [None]:
minCut=4.0 #maximum allowed mean distance in closed state
deltaCut=3.0 #minimum allowed distance increase from closed to open state
disulfideCandidatesTable=top10000pair_infoTable[((top10000pair_infoTable['closed_Distance_Mean']<=minCut) & \
    ((top10000pair_infoTable['open_Distance_Mean']- \
     top10000pair_infoTable['closed_Distance_Mean'])>deltaCut))]
disulfideCandidatesTable.to_csv("Disulfide_Linkage_Candidates.csv",index=False)

In [8]:
disulfideCandidatesTable=pd.read_csv("Disulfide_Linkage_Candidates.csv")
disulfideCandidatesTable.head()

Unnamed: 0.1,Unnamed: 0,X,Y,X_SeqInd,Y_SeqInd,Importance,Rank,closed_ReciprocalDistance_Mean,closed_ReciprocalDistance_Std,closed_Distance_Mean,closed_Distance_Std,open_ReciprocalDistance_Mean,open_ReciprocalDistance_Std,open_Distance_Mean,open_Distance_Std,Color,Label,alpha,Delta_Distance
0,42,1017,1393,2146,2522,0.047363,4769.0,0.561049,0.070354,1.823062,0.33309,0.197936,0.034995,5.213034,0.918645,DiSulfideTarget,DiSulfideTarget,0.75,3.389972
1,43,1017,1394,2146,2523,0.048043,4491.0,0.351193,0.071958,2.989168,0.71789,0.144072,0.014034,7.012239,0.737165,DiSulfideTarget,DiSulfideTarget,0.75,4.023071
2,122,1040,1369,2169,2498,0.074485,482.0,0.417565,0.108187,2.607278,0.896595,0.186426,0.061404,5.742409,1.280145,DiSulfideTarget,DiSulfideTarget,0.75,3.135131
3,142,1046,985,2175,2114,0.100263,77.0,0.424628,0.164778,3.00106,1.75567,0.113139,0.012293,8.944383,0.979418,DiSulfideTarget,DiSulfideTarget,0.75,5.943323
4,148,1049,1053,2178,2182,0.0723,565.0,0.341402,0.097817,3.267708,1.238794,0.126631,0.020656,8.073936,1.120094,DiSulfideTarget,DiSulfideTarget,0.75,4.806228


In [9]:
correlation_data_utilities.bokeh_dataTable_heatMap(
    plotData=disulfideCandidatesTable,Xcol='X',Ycol='Y',dataCol='Delta_Distance',
    width=640,height=640,
    rectheight=1,rectwidth=1,
    colorMap=sns.color_palette("plasma", n_colors=256).as_hex(),
    title=None,
    xlabel=None,
    ylabel=None,
    axisFontSize="14pt",
    vmin=None,vmax=None)

[('index', '@{index}'), ('Unnamed: 0', '@{Unnamed: 0}'), ('X', '@{X}'), ('Y', '@{Y}'), ('X_SeqInd', '@{X_SeqInd}'), ('Y_SeqInd', '@{Y_SeqInd}'), ('Importance', '@{Importance}'), ('Rank', '@{Rank}'), ('closed_ReciprocalDistance_Mean', '@{closed_ReciprocalDistance_Mean}'), ('closed_ReciprocalDistance_Std', '@{closed_ReciprocalDistance_Std}'), ('closed_Distance_Mean', '@{closed_Distance_Mean}'), ('closed_Distance_Std', '@{closed_Distance_Std}'), ('open_ReciprocalDistance_Mean', '@{open_ReciprocalDistance_Mean}'), ('open_ReciprocalDistance_Std', '@{open_ReciprocalDistance_Std}'), ('open_Distance_Mean', '@{open_Distance_Mean}'), ('open_Distance_Std', '@{open_Distance_Std}'), ('Color', '@{Color}'), ('Label', '@{Label}'), ('alpha', '@{alpha}'), ('Delta_Distance', '@{Delta_Distance}'), ('ColorWeight', '@{ColorWeight}')]
