# 2D Visualization of 3D Data
## Zachary Neronha, Wong Lab
### 26 October 2018, created for the purpose of simple viewing of data in two dimensions 

### Workflow Overview
- Data should first be converted from Imaris files to the XLS folders in the convert folder tab
- This code allows for the user to easily visualize and restrict the data of interst
- Data must then be reexported to CSV for reading by the R codes for TDA to be performed

### Import packages and creater helper functions to streamline analysis later on

In [1]:
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.cm as cm
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
%matplotlib inline
from __future__ import division
init_notebook_mode(connected=True)
import plotly.plotly as py
import plotly
plotly.tools.set_credentials_file(username='zjneronha', api_key='PMrKGUGAet6kUZ5Rcf5E')
import brewer2mpl as cbrew

In [2]:
#Create helper functions

#function 1: return the cells within a certain location
def findRange(xPts,yPts,xRange,yRange):
    cellList = []
    for ii in range(0,np.size(xPts,0)):
        qLog = np.greater(xPts[ii,:],xRange[0]) & np.less(xPts[ii,:],xRange[1])
        pLog = np.greater(yPts[ii,:],yRange[0]) & np.less(yPts[ii,:],yRange[1])
        qLog = sum(qLog)
        pLog = sum(pLog)

        if qLog > 0 and pLog >0:
            cellList.append(ii)
    
    return cellList

#function 2: pull and export only the cells of interest (list the cells you want)
def selExport(xPts,yPts,zPts,MyCells,pathname):
    selX = xPts[MyCells,:]
    selY = yPts[MyCells,:]
    selZ = zPts[MyCells,:]
    
    ptsSZE = sum(sum(np.logical_not(np.isnan(selX))))
    selPTS = np.empty((ptsSZE,3))
    loopcounter = 0
    
    #convert back to direct matrix format and export
    for ii in range(0,np.size(selX,0)):
        for jj in range(0,np.size(selX,1)):
            if np.isnan(selX[ii,jj])==0:
                selPTS[loopcounter,:] = [selX[ii,jj],selY[ii,jj],selZ[ii,jj]]
                loopcounter = loopcounter + 1
                
    dTPS = pd.DataFrame(selPTS)
    dTPS.to_csv(pathname,index=False,header = False)
    
    print("Feature Complete; Cluster AllTemp Pts File Saved!")

#function 3: pull out and export all points at one time frame (all cells from one frame)
def temExport(xPts,yPts,zPts,Frame,pathname):
    selX = xPts[:,Frame]
    selY = yPts[:,Frame]
    selZ = zPts[:,Frame]
    
    ptsSZE = sum(np.logical_not(np.isnan(selX)))
    selPTS = np.empty((ptsSZE,3))
    loopcounter = 0
    
    #convert back to direct matrix format and export
    for ii in range(0,np.size(selX,0)):
        if np.isnan(selX[ii])==0:
            selPTS[loopcounter,:] = [selX[ii],selY[ii],selZ[ii]]
            loopcounter = loopcounter + 1
                
    dTPS = pd.DataFrame(selPTS)
    dTPS.to_csv(pathname,index=False,header = False)
    
    print("Feature Complete; Cluster SingTemp Pts File Saved!")

### Import position data, calculate velocity data
- Load data for a set well at a specified imaging interval (fixed based on microscopy data)
- Also pull out velocity data based on positions

In [41]:
currentwell = 19
interval = (1/3) #hours between imaging 

#import data from csv
xlsZ = pd.ExcelFile("NuclearTracking/ConvertedData/w"+repr(currentwell)+"_combineddata.xls")
dxx = pd.read_excel(xlsZ,'xStore')
dyy = pd.read_excel(xlsZ,'yStore')
dzz = pd.read_excel(xlsZ,'zStore')

# conver to an array
xDat = np.array(dxx)
yDat = np.array(dyy)
zDat = np.array(dzz)

#Initialize and fill velocity arrays 
xVel = np.empty((np.size(xDat,0),np.size(xDat,1)))
yVel = np.empty((np.size(xDat,0),np.size(xDat,1)))
zVel = np.empty((np.size(xDat,0),np.size(xDat,1)))
xVel[:] = np.nan
yVel[:] = np.nan
zVel[:] = np.nan

#fill sequentially by time points
for k in range(0,np.size(xDat,1)-1):
    xVel[:,k] = xDat[:,k+1]-xDat[:,k]
    yVel[:,k] = yDat[:,k+1]-yDat[:,k]
    zVel[:,k] = zDat[:,k+1]-zDat[:,k]

### Plot the data temporally
- Note each color corresponds to a distinct cell track over the time window
- This block of code plots all specified cells over the window in a devoloved 2D setting for easy identification of ranges of interest

In [11]:
pltdata = []
for i in range(0,np.size(xDat,0)):
    # Create a trace
    trace = go.Scatter(
        name = 'Cell ' + repr(i),
        x = xDat[i,:],
        y = yDat[i,:],   
        )
    pltdata.append(trace)
data = [pltdata]

py.iplot(pltdata, filename='basic-line1')

The draw time for this plot will be slow for clients without much RAM.


### Range Restriction
- This block of code allows the user to specify a range in X and Y and pulls out the cells of interest in that area
- This is useful if one wishes to only extract and export one feature of interest for further processing

In [77]:
xRan = [540,575]
yRan = [330,370]
celL = findRange(xDat,yDat,xRan,yRan)
print(celL)

[4, 20, 21, 37, 40, 43, 48, 49, 52, 62, 65, 72, 74, 82, 95, 103, 107, 113, 117, 122, 135, 136, 146, 147, 150, 157, 159, 172, 183]


### Frequency Estimation
- The following block of code allows the user to plot the x and y positions respectivly across time
- This allows for the extraction of frequency information, etc. 

In [78]:
#X and Y temporal plots, frequency estimation, etc.
xred = xDat[celL,:]
yred = yDat[celL,:]
trL = np.multiply(xred,0)+1
for k in range(0,np.size(trL,1)):
    trL[:,k] = np.multiply(trL[:,k],k)

trL = np.multiply(trL,interval)

In [79]:
#plot sequentially
pltdata = []
for i in range(0,np.size(xred,0)):
    # Create a trace
    trace = go.Scatter(
        name = 'Cell ' + repr(i),
        x = trL[i,:],
        y = xred[i,:], 
        )
    pltdata.append(trace)
data = [pltdata]

py.iplot(pltdata, filename='XFREQ')


In [80]:
#plot Y frequency
pltdata = []
for i in range(0,np.size(xred,0)):
    # Create a trace
    trace = go.Scatter(
        name = 'Cell ' + repr(i),
        x = trL[i,:],
        y = yred[i,:], 
        )
    pltdata.append(trace)
data = [pltdata]

py.iplot(pltdata, filename='YFREQ')

### Exporting
- The following block of code exports the xyz position data for selected wells using a specified path
- Only exports data for specified cells (this is linked to the above block of code that allows the user to pull out the cells of interest in a specific location

In [62]:
pathnameS = "SelectedFeatures/Circuit1_w"+repr(currentwell)+".csv"
selExport(xDat,yDat,zDat,celL,pathnameS)

Feature Complete; File Saved!


# Breakup analysis for all points in one frame
- Allows user to look at the position of cells in only one time point
- Look specifically at plotting
- Then export to a csv for TDA in R

### Plotting in one frame in an interactive environment 
- Specify frame of interest

In [42]:
#define frame of interest
FOI = 100

plt_data = []
trace = go.Scatter3d(
            name = 'SyntheticSphere'+repr(i),
            x = xDat[:,FOI],
            y = yDat[:,FOI],
            z = zDat[:,FOI],
            mode = 'markers',
            marker = dict(
                color = 1,
                size = 8,
                symbol = 'circle',
                line = dict(
                    color = 'rgb(180, 180, 180)',
                    width = 1.0
                ),
                opacity = 0.4
            )
        )

plt_data.append(trace)

layout = go.Layout(margin = dict(l = 0, r = 0, b = 0, t = 0))

fig = go.Figure(data=plt_data, layout=layout)

iplot(fig)  


### Export
- Block export of the positions of all cells at only a single time point to CSV

In [43]:
pathnameF = "SelectedFeatures/TemBreak_w"+repr(currentwell)+"t"+repr(FOI)+".csv"
temExport(xDat,yDat,zDat,FOI,pathnameF) 

Feature Complete; Cluster SingTemp Pts File Saved!


## Export at Scale
- Export here is streamlined for an entire folder's worth of xls files
- Exports all cells at that particular timepoint

In [8]:
import glob
import os
readpath = glob.glob("Susan3DSet2/*.xls")
FrameS = 1
for path in readpath:
    print(path)
    filename = os.path.basename(path)
    wellname = filename.split("_")[0]
    #import data from csv
    xlsZ = pd.ExcelFile(path,engine='xlrd')
    dxx = pd.read_excel(xlsZ,'xStore')
    dyy = pd.read_excel(xlsZ,'yStore')
    dzz = pd.read_excel(xlsZ,'zStore')
    xDat = np.array(dxx)
    yDat = np.array(dyy)
    zDat = np.array(dzz)
    
    expPath = "Susan3DSet2/"+wellname+".csv"
    temExport(xDat,yDat,zDat,FrameS,expPath)

Susan3DSet2/B10_combineddata.xls
Feature Complete; Cluster SingTemp Pts File Saved!
Susan3DSet2/A07_combineddata.xls
Feature Complete; Cluster SingTemp Pts File Saved!
Susan3DSet2/A08_combineddata.xls
Feature Complete; Cluster SingTemp Pts File Saved!
Susan3DSet2/A12_combineddata.xls
Feature Complete; Cluster SingTemp Pts File Saved!
Susan3DSet2/B12_combineddata.xls
Feature Complete; Cluster SingTemp Pts File Saved!
Susan3DSet2/A09_combineddata.xls
Feature Complete; Cluster SingTemp Pts File Saved!
