This notebook allows interactive side by side plotting of 2D grid data. Be sure to either upload the 'dataFiles' folder if needed.

In [1]:
import numpy as np
import scipy as sp
import matplotlib
from matplotlib import pyplot as plt
import collections
import sys
import gc
import os
import sklearn as skl
from sklearn import decomposition
from sklearn import metrics
from sklearn import discriminant_analysis
from sklearn import cluster
import tqdm
import ipywidgets
import copy

from ipywidgets import interact, interactive, fixed, interact_manual, interactive_output
import ipywidgets as widgets

# Loading
This cell will look for files in the directory specified by the 'dataDir' variable

It will load any files which contain the keyword 'mean' in their name using numpy's 'loadtxt' command. The loader assumes that the files will be named according to the scheme:

SYSTEMNAME.meanPROPERTYNAME.txt

These files should contain 2D arrays of data with one row of data per line.
They will be stored in a dictionary object with entry keys 'SYSTEMNAME.PROPERTYNAME'

The loader will then create the array of each unique 'SYSTEMNAME' in the dictionary of loaded data arrays.

For each unique 'SYSTEMNAME' it will then attempt to load the files 'SYSTEMNAME.gridX.npy' and 'SYSTEMNAME.gridY.npy' from the data file directory.

thus each the 2D array entries should correspond property maps over the coordinates specified by the x and y coordinates in the corresponding row and column of gridX and gridY respectively.

This notebook is geared at plotting height maps for a lipid bilayer using the average density over both leaflets to plot a contour / mask.

Any manner of 2D grid quantities could be loaded and plotted however provided corresponding gridX and gridY arrays are present.
Similarly the density files need not be present, but contour plotting will generate an error if they are absent.

In [3]:
dataDir='dataFiles'

fileList=os.listdir(dataDir)

meanDataFiles=[dataFile for dataFile in fileList if 'mean' in dataFile]

print 'The following mean data files were located:',
print meanDataFiles
print ''
print '---- ---- ---- ----'
print ''
meanDataDict={}
print 'loading mean data files:'
for meanDataFile in meanDataFiles:
    entryName=meanDataFile.split('.')[0]
    entryName=entryName+'.'+meanDataFile.split('.')[1].replace('mean','')
    #the data files seem to be saved rotated and transposed relative to the X and Y
    #coordinate grids.
    meanDataDict[entryName]=np.rot90(np.rot90(np.loadtxt('/'.join([dataDir,meanDataFile])).T))
    print '%-18s [%g x %g; value range=(%8f,%8f)]'%(
            entryName,
            meanDataDict[entryName].shape[0],meanDataDict[entryName].shape[1],
            np.min(meanDataDict[entryName]),np.max(meanDataDict[entryName]))
print ''
print '---- ---- ---- ----'

print 'System names:',
systems=np.unique([entry.split('.')[0] for entry in meanDataDict.keys()])
print systems
print 'Mean Data Set types:',
dataTypes=np.unique(
    [entry.split('.')[1] \
     for entry in meanDataDict.keys()])
print dataTypes

print ''
print '---- ---- ---- ----'
print ''    
    
print 'loading X and Y grids:'
gridDict={}
for system in systems:
    for gridType in ['gridX','gridY']:
        gridName='.'.join([system,gridType])
        gridFileName='.'.join([gridName,'npy'])
        gridPath='/'.join([dataDir,gridFileName])
        gridDict[gridName]=np.load(gridPath)
        print '%-18s [%g x %g; coordinate range=(%8.3f,%8.3f)]'%(
            gridName,
            gridDict[gridName].shape[0],gridDict[gridName].shape[1],
            np.min(gridDict[gridName]),np.max(gridDict[gridName]))
print ''
print '---- ---- ---- ----'
print ''

print 'Mean Data Dictionary Keys:',
print np.sort(meanDataDict.keys())

print 'Grid Dictionary Keys:',
print np.sort(gridDict.keys())

The following mean data files were located: ['PIP2.meanUpperHeight.txt', 'PIP2.meanUpperDensity.txt', 'POPC.meanUpperDensity.txt', 'POPS.meanUpperDensity.txt', 'PIP2.meanLowerHeight.txt', 'POPS.meanLowerDensity.txt', 'POPS.meanUpperHeight.txt', 'POPC.meanUpperHeight.txt', 'POPC.meanLowerDensity.txt', 'POPS.meanLowerHeight.txt', 'PIP2.meanLowerDensity.txt', 'POPC.meanLowerHeight.txt']

---- ---- ---- ----

loading mean data files:
PIP2.UpperHeight   [235 x 233; value range=(97.908477,109.895698)]
PIP2.UpperDensity  [235 x 233; value range=(0.000000,0.000045)]
POPC.UpperDensity  [230 x 229; value range=(0.000000,0.000048)]
POPS.UpperDensity  [232 x 233; value range=(0.000000,0.000037)]
PIP2.LowerHeight   [235 x 233; value range=(61.440388,72.951450)]
POPS.LowerDensity  [232 x 233; value range=(0.000000,0.000056)]
POPS.UpperHeight   [232 x 233; value range=(95.570330,106.419144)]
POPC.UpperHeight   [230 x 229; value range=(101.543684,112.623104)]
POPC.LowerDensity  [230 x 229; value range

## Compute 'thickness' and 'averageDensity'

The next cells assume that among the 'PROPERTYNAME' entries loaded, each system has a corresponding 'LowerDensity', 'UpperDensity', 'LowerHeight' and 'UpperHeight' from which it will compute an 'AverageDensity' and 'Thickness' respectively.

There is a saftey check in place so it will not attempt to calculate thickness if one or both height maps is missing.

Likewise if one or both density maps is missing for a system, it will skip that system. In this case, however, a warning will be generated since this will create errors when trying to plot density contours.

In [4]:
for system in systems:
    entryName='.'.join([system,'Thickness'])
    if (('.'.join([system,'UpperHeight']) in meanDataDict) & \
        ('.'.join([system,'LowerHeight']) in meanDataDict)):
        meanDataDict[entryName]=meanDataDict['.'.join([system,'UpperHeight'])] - \
            meanDataDict['.'.join([system,'LowerHeight'])]
    if (('.'.join([system,'UpperDensity']) in meanDataDict) & \
        ('.'.join([system,'LowerDensity']) in meanDataDict)):
        meanDataDict[entryName]=meanDataDict['.'.join([system,'UpperDensity'])] - \
            meanDataDict['.'.join([system,'LowerDensity'])]
        entryName='.'.join([system,'AverageDensity'])
        meanDataDict[entryName]=(meanDataDict['.'.join([system,'UpperDensity'])] + \
            meanDataDict['.'.join([system,'LowerDensity'])])/2.0
    else:
        print "WARNING! Density upper and / or lower density data set was missing for %s"%(
            system)
        print "         Contour plotting will not function properly for this system."
dataTypes=np.unique(
    [entry.split('.')[1] \
     for entry in meanDataDict.keys()])
print 'mean data types:',
print np.sort(dataTypes)

mean data types: ['AverageDensity' 'LowerDensity' 'LowerHeight' 'Thickness' 'UpperDensity'
 'UpperHeight']


# Visualize Data

This cell will automatically generate interactive plotting of the data grids loaded.

It assumes that each system has corresponding 'gridX' and 'gridY' arrays loaded into the 'gridDict' dictionary and an 'AverageDensity' entry must be loaded into the 'meanDataDict' dictionary for any system for which contour / mask plotting is desired

widgets are provided to allow side by side plotting of any two systems for the selected data type. Also provided are controls to turn on / off density contour / mask plotting and to control the density-value of the contour to be generated.

In [108]:
system1Menu=widgets.Dropdown(description="System 1",options=systems)
system2Menu=widgets.Dropdown(description="System 2",options=systems)
propertyMenu=widgets.Dropdown(description="Data Set",options=dataTypes)
contourToggleButton=widgets.ToggleButton(description="ToggleContour")
maskAlphaValue=widgets.FloatSlider(description="MaskOpacity",
                             min=0,max=1,step=.0005,value=1.0)
densityContourValue=widgets.FloatLogSlider(description="ContourValue",
                                      min=-12,max=0.0,step=.25,value=1e-5)

def renderDataGrid(
        coordinateGrids,
        dataGridName,dataGrid,
        dataRange,ax):
    dPlot=ax.pcolormesh(
        coordinateGrids[0],coordinateGrids[1],
        dataGrid,vmin=dataRange[0],vmax=dataRange[1])
    ax.set_title(dataGridName)
    return(dPlot)

def renderDensityContour(
        coordinateGrids,
        densityGrid,
        densityContourValue,
        maskAlpha,ax):
    cPlot=ax.contour(
        coordinateGrids[0],coordinateGrids[1],
        densityGrid,
        levels=[0,densityContourValue],colors=['#ff00aa'])
    cfPlot=ax.contourf(
        coordinateGrids[0],coordinateGrids[1],
        densityGrid,
        levels=[0,densityContourValue],colors=['#000000'],
        alpha=maskAlpha)
    return(cPlot,cfPlot)

def show_heatmap(system1,system2,propertyType,
                 contourToggle,densityContourValue,
                 maskAlphaValue):
    print 'system1:%s'%system1
    print 'system2:%s'%system2
    print 'dataGridType:%s'%propertyType
    print 'showDensityContour:%s'%contourToggle
    print 'densityContourValue:%.3e'%densityContourValue
    
    #bounds: [[minX,maxX],[minY,maxY]
    coordinateGridNames=tuple(
        tuple('.'.join([system,gridType]) \
            for gridType in ['gridX','gridY']) \
        for system in [system1,system2])
    print 'coordinate grids:',
    print coordinateGridNames
    coordinateGrids=tuple(
        tuple(gridDict[coordinateGridName] for coordinateGridName in gridSet) \
        for gridSet in coordinateGridNames)
    
    dataGridNames=tuple('.'.join([system,propertyType]) \
                       for system in [system1,system2])
    print 'data grids:',
    print dataGridNames
    dataGrids=tuple(meanDataDict[dataGridName] \
                     for dataGridName in dataGridNames)
    dataRange=(np.min([np.min(dataGrid) \
                       for dataGrid in dataGrids]),
               np.max([np.max(dataGrid) \
                       for dataGrid in dataGrids]))
    print 'data range:',
    print dataRange
    
    contourGridNames=tuple('.'.join([system,'AverageDensity']) \
                          for system in [system1,system2])
    print 'contour grids:',
    print contourGridNames
    contourGrids=tuple(meanDataDict[contourGridName] \
                       for contourGridName in contourGridNames)
    contourRange=(np.min([np.min(contourGrid) \
                          for contourGrid in contourGrids]),
                  np.max([np.max(contourGrid) \
                          for contourGrid in contourGrids]))
    print 'contour range:',
    print contourRange
    
    
    plotFig,plotAxs=plt.subplots(1,2)
    plotFig.set_figwidth(12)
    plotFig.set_figheight(5)
    shrinkVal=1.0 #in case we need to resize colorbar
    for iSys,system in enumerate([system1,system2]):
        ax=plotAxs.flat[iSys]
        dPlot=renderDataGrid(
                coordinateGrids[iSys],
                dataGridNames[iSys],
                dataGrids[iSys],
                dataRange,ax)
        if contourToggle:
            cPlot=renderDensityContour(
                coordinateGrids[iSys],
                contourGrids[iSys],
                densityContourValue,
                maskAlphaValue,ax)
        plt.colorbar(dPlot,ax=ax,shrink=shrinkVal)
    plt.tight_layout()
    
controlPannelDict={
    'system1':system1Menu, 'system2':system2Menu,
    'propertyType':propertyMenu,
    'contourToggle':contourToggleButton,
    'densityContourValue':densityContourValue,
    'maskAlphaValue':maskAlphaValue}
dispOut=interactive_output(show_heatmap,controlPannelDict)
contourPannel=widgets.HBox([contourToggleButton,densityContourValue,maskAlphaValue])
systemSelectPannel=widgets.HBox([system1Menu,system2Menu])
controlPannel=widgets.VBox([contourPannel,propertyMenu,systemSelectPannel])
display(controlPannel,dispOut)

VkJveChjaGlsZHJlbj0oSEJveChjaGlsZHJlbj0oVG9nZ2xlQnV0dG9uKHZhbHVlPUZhbHNlLCBkZXNjcmlwdGlvbj11J1RvZ2dsZUNvbnRvdXInKSwgRmxvYXRMb2dTbGlkZXIodmFsdWU9MWXigKY=


Output()