In [1]:
import pytraj as pt
import pytraj.utils.progress
import numpy as np
import scipy as sp
from scipy import signal
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import collections
import sys
import gc
import os
import sklearn as skl
from sklearn import decomposition
from sklearn import metrics
from sklearn import discriminant_analysis
from sklearn import pipeline
import tqdm
import nglview as nv
import ipywidgets
import copy
import cPickle

# Preamble
This notebook will look into analyzing the effect of modulating
the lipid environment upon piezo's conformational dynamics.

There are some important differences to consider here.

First and foremost, whereas the ligand simulations affect piezo's
by directly perturbing the structure of a particular arm, these
simulations instead modulate the membrane environment surrounding piezo.

Piezo is a triskelion shaped homo-trimer. Its cryo-em structure is quite
symmetric.... however, this symmetry is not retained during simulation.

Interestingly, observing the structure of Piezo under several replicas in
symmetric, single component membrane environment reveal that one arm will
always display different conformations compared to the other arms.

The arm which does so is not the same from one replica to the next, so it
seems that this breaking of symmetry is just a consequence of acclimating
to a higher temperature membrane environment (315 K here as compared to
around 90K for cryo em).

So... over many replicas, we seem to recover the symmetry as an average
property...

Unfortunately, running many replicas is not tractable for this system due to its
sheer size... Instead, we will consider measurements from different arms as equivalent.

This was not done for the ligand simulations, however, because, as mentioned, one particular arm
was being perturbed. Thus tracking which individual arm each measurement came from was important
during analysis of ligand simulations, but here, we are able to consider average
properties over all 3 arms.

This will incur a little legwork in the initial setup but should prove helpful
during subsequent analysis because, after inducing symmetry, we will have one third as many
features to consider.

# Data Loading and Formatting

We will now begin by loading the joint simulation data and transforming to treat
measurements from each arm as equivalent. This will be done by first melting the
data frame. We can then operate on the angle labels column reassign the
residue index number as $resid_{new}=resid_{old}\bmod3$ (i.e. take the remainder
after dividing by 3). A new column can them be added track which arm each angle
belonged to. The data will then be repivoted for further analysis.

## Data loading 
As usual, we had to save the data table in chunks. We thus need to define an appropriate
loading function as shown below

In [2]:
def loadDataFrameChunks(filePathBase,nChunks):
    dataTables=[]
    chunkStr='chunk_%s0%gg'%('%',int(np.floor(np.log10(nChunks)))+1)
    with tqdm.tqdm_notebook(np.arange(nChunks)) as loadBar:
        loadBar.set_description('Loading Data Frame Chunks')
        for iChunk in loadBar:
            dataFilePath='.'.join([filePathBase,chunkStr%iChunk,'csv'])
            dataTables.append(
                pd.read_csv(dataFilePath))
            gc.collect()
    return pd.concat(dataTables)

Lets check how many chunks we have

In [4]:
!ls -l dataFiles/phiPsiTables/joint_memb.phi_psi_table.chunk_*.csv | awk -F / '{print $NF}' | awk -F . '{print $(NF-1)}' | awk -F _ '{print $NF}' | tail -n 1

41


In [5]:
rawPhiPsiTable=loadDataFrameChunks('dataFiles/phiPsiTables/joint_memb.phi_psi_table',nChunks=41)
rawPhiPsiTable.head()

HBox(children=(IntProgress(value=0, max=41), HTML(value=u'')))




Unnamed: 0,System,Frame,psi_1,phi_2,psi_2,phi_3,psi_3,phi_4,psi_4,phi_5,...,psi_4249,phi_4250,psi_4250,phi_4251,psi_4251,phi_4252,psi_4252,phi_4253,psi_4253,phi_4254
0,POPC,0,-92.538319,-90.67597,50.46789,-83.536252,-56.646704,-66.994656,-43.655675,-48.537961,...,-45.001072,-51.833152,-49.18838,-81.741315,-5.315496,-82.160726,-80.366564,-54.248897,-57.795869,-103.283576
1,POPC,1,-67.897299,-87.96851,80.827211,-84.977793,-53.47365,-67.267185,-21.730216,-73.346152,...,-57.252846,-47.089584,-56.609321,-51.742248,-42.11338,-67.250609,-62.100206,-79.93078,-63.703503,-88.860026
2,POPC,2,-54.649079,-77.358751,105.068599,-86.017091,-75.500165,-66.551154,-40.604271,-70.94598,...,-39.616951,-51.222322,-59.783692,-64.687987,-25.811014,-81.296024,-35.502232,-90.227413,-45.320649,-101.483824
3,POPC,3,-53.227823,-62.547584,65.87108,-69.612892,-49.677393,-56.152612,-45.944265,-70.275168,...,-58.491646,-41.057612,-45.783047,-73.580293,-48.073389,-60.70868,-52.913657,-78.880614,-48.355414,-108.382543
4,POPC,4,-44.386791,-75.63247,78.98452,-65.24908,-56.598326,-44.9754,-44.357276,-76.498243,...,-55.192075,-52.6673,-42.970486,-76.748131,-51.178906,-55.404238,-48.370952,-87.449109,-52.053732,-104.899646


### Data munging
We now need to transform our data as detailed above

In [6]:
phiPsiTableLong=pd.melt(frame=rawPhiPsiTable,
                        id_vars=['System','Frame'],
                        var_name='Angle',
                        value_name='Measurement')
phiPsiTableLong.head()

Unnamed: 0,System,Frame,Angle,Measurement
0,POPC,0,psi_1,-92.538319
1,POPC,1,psi_1,-67.897299
2,POPC,2,psi_1,-54.649079
3,POPC,3,psi_1,-53.227823
4,POPC,4,psi_1,-44.386791


In [11]:
tqdm.tqdm_notebook().pandas()
#Split 'Angle' column
#Add 'Angle Type' column by splitting 'Angle' on '_' and taking first entry
phiPsiTableLong['AngleType']=phiPsiTableLong['Angle'].progress_apply(
    lambda x: x.split('_')[0])
#Add 'ResID' column by splitting 'Angle' on '_' and taking second entry
phiPsiTableLong['ResID']=phiPsiTableLong['Angle'].progress_apply(
    lambda x: x.split('_')[1])
#Add 'ResNum' column as ResID%3
phiPsiTableLong['ResNum']=phiPsiTableLong['ResID'].progress_apply(
    lambda x: int(x)%3)
#Add 'Arm' column as floor(ResID/3)
phiPsiTableLong['Arm']=phiPsiTableLong['ResID'].progress_apply(
    lambda x: 'Arm_%g'%(np.floor((int(x)-1)/3)+1))
#Remove unused columns
phiPsiTableLong=phiPsiTableLong[
    ['System','Frame','Arm','ResNum','AngleType','Measurement']]
phiPsiTableLong.head()

HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=43480500), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=43480500), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=43480500), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=43480500), HTML(value=u'')))

Unnamed: 0,System,Frame,Arm,ResNum,AngleType,Measurement
0,POPC,0,Arm_1,1,psi,-92.538319
1,POPC,1,Arm_1,1,psi,-67.897299
2,POPC,2,Arm_1,1,psi,-54.649079
3,POPC,3,Arm_1,1,psi,-53.227823
4,POPC,4,Arm_1,1,psi,-44.386791


In [25]:
testTab=phiPsiTableLong[['ResNum','AngleType']][:10]
testTab.apply(lambda x: '%s_%s'%(x[1],x[0]),axis=1)

0    psi_1
1    psi_1
2    psi_1
3    psi_1
4    psi_1
5    psi_1
6    psi_1
7    psi_1
8    psi_1
9    psi_1
dtype: object

In [None]:
phiPsiTableLong['Angle']=phiPsiTableLong[['AngleType','ResNum']].progress_apply(
    lambda x: '%s_%s'%(x[0],x[1]),axis=1)
phiPsiTableLong=phiPsiTableLong[
    
]