In [73]:
import numpy as np
import pandas as pd
from functools import partial
from scipy.sparse import csr_matrix
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected = True)

# Importing Collman14v2 Gaussian Weighted Data

The Collman14v2 Gaussian Weighted Data consisted of a csv file with centroid locations, F0 values for each centroid, and cluster labels. The F0 values represent the integrated sum intensity over an 11 x 11 x 11 cube surrounding each centroid. The F0 values were clustered using Hierarchical GMM, producing seven individual cluster labels. 

In [6]:
# XYZ locations of Collman14v2 centroids exists in a csv file that should be converted to a pandas data frame:

centroid_location_dataframe = pd.read_csv('collman14v2_gaussianWeighted_xyzLocations.csv')

In [7]:
# Extract centroid locations from pandas data frame as a matrix:

centroid_location_matrix = centroid_location_dataframe.as_matrix()

In [8]:
# Cluster labels for each centroid exist in a csv file that should be converted to a pandas data frame:

feature_label_dataframe = pd.read_csv('collman14v2_gaussianWeighted_ClusterLabels.csv')

In [9]:
# Extract feature labels from pandas data frame as a matrix:

feature_label_matrix = feature_label_dataframe.as_matrix()

In [10]:
feature_label_matrix.shape

(1036, 1)

In [11]:
centroid_location_matrix.shape

(1036, 3)

In [12]:
# Horizontally concatenate the centroid locations matrix and the feature labels matrix: 

concatenated_matrix = np.hstack((centroid_location_matrix, feature_label_matrix))

In [13]:
concatenated_matrix.shape

(1036, 4)

In [14]:
# Sort the concatenated matrix by clusters:

sorted_cluster_matrix = concatenated_matrix[concatenated_matrix[:, 3].argsort()]

In [15]:
# Split the cluster-sorted matrix produced in the cell above to obtain subarrays that contain all of the centroids belonging to each cluster: 

new_cluster_matrix = np.split(sorted_cluster_matrix, np.where(np.diff(sorted_cluster_matrix[:, 3]))[0] + 1)

# XYZ Spatial Cluster Plots

In the XYZ Spatial Cluster Plots, each point represents the x-y coordinates of a centroid within a particular z-slice. Each of the points is colored according to the feature label (cluster) that it belongs to. The z-slices are numbered from 5 to 33 because only these z-slices contain fluorescently labeled synapses. 

In [16]:
z_slices= {} 
for i in range(5, 34):
    z_slices[str(i)] = []

In [17]:
for elem in concatenated_matrix:
    my_z = elem[2]
    z_slices[str(my_z)].append(elem)

In [18]:
def make_trace(i, point_set):
    return go.Scatter(
        x = [elem[0] for elem in point_set],
        y = [elem[1] for elem in point_set],
        mode = 'markers',
        name = 'Cluster ' + str(i + 1)
    )

for z, points in z_slices.items():
    point_sets = [[] for _ in range(7)]
    for point in points:
        point_sets[point[3] - 1].append([point[0], point[1]])
    traces = [make_trace(i, point_sets[i]) for i in range(7)]
    layout = go.Layout(title ='z = ' + str(z),
                xaxis = dict(
                    title = 'x'),
                yaxis = dict(
                    title = 'y')
            )
    figure = go.Figure(data = traces, layout = layout)
    iplot(figure)

# XYZ Spatial Cluster Plot in Altered Aspect Ratio 

The XYZ Spatial Cluster Plot in Altered Aspect Ratio is a plot of all of the fluorescently labeled centroids in the Collman14v2 data set in three dimensions in an altered aspect ratio. Each of the centroids is colored according to the feature label (cluster) that it belongs to. This plot is considered to be in an 'altered aspect ratio' because the aspect ratio is incosistent with the actual size of the tissue section that was used to obtain and label each of the centroids. 

In [53]:
def scatter(data, i): 
    trace = go.Scatter3d(
        x = data[:, 0],
        y = data[:, 1],
        z = data[:, 2],
        mode = 'markers',
        marker = dict(
        size = '9',
        showscale = False,
        ), name = 'Cluster {}'.format(i)
        )
    return trace 

trace = [scatter(data, i + 1) for i, data in enumerate(new_cluster_matrix)]


layout = go.Layout(
    title = 'XYZ Spatial Location of Centroids in Altered Aspect Ratio',
    height = 800,
    width = 800,
    showlegend = True
)

data = trace
fig = go.Figure(data = data, layout = layout)
iplot(fig)

# XYZ Spatial Cluster Plot in Native Aspect Ratio

The XYZ Spatial Cluster Plot in Native Aspect Ratio is a plot of all of the fluorescently labeled centroids in the Collman14v2 data set in three dimensions in their native aspect ratio. Each of the centroids is colored according to the feature label (cluster) that it belongs to. This plot is created using the native aspect ratio, which represents the actual size of the tissue section used to obtain and label the centroids. The size of the tissue section, and thus the native aspect ratio, is as follows: x = 3.72, y = 3.72, z = 70. 

In [54]:
def scatter(data, i): 
    trace = go.Scatter3d(
        x = data[:, 0],
        y = data[:, 1],
        z = data[:, 2],
        mode = 'markers',
        marker = dict(
        size = '9',
        showscale = False,
        ), name = 'Cluster {}'.format(i)
        )
    return trace 

trace = [scatter(data, i + 1) for i, data in enumerate(new_cluster_matrix)]


layout = go.Layout(
    title = 'XYZ Spatial Location of Centroids in Native Aspect Ratio',
    scene = dict(
    aspectmode = 'manual',
    aspectratio = dict(x = 3.72, y = 3.72, z = 70)
    ),
    showlegend = True
)

data = trace
fig = go.Figure(data = data, layout = layout)
iplot(fig)

# Histograms - Relative Frequency of Clusters along X, Y, and Z Axes 

The histograms plotted below describe the distribution of clusters along the x, y, and z axes of each centroid. Relative frequency describes the number of clusters that fall within a each bin divided by the total number of times that each cluster appears in total. 

In [21]:
from plotly import tools

def histogram_z(data, i):
    trace = go.Histogram(
        x = data[:, 0],
        histnorm = 'probability', 
        name = 'Cluster {}'.format(i),
        xbins = dict(
            start = 0, 
            end = 10000, 
            size = 1000),
        )
    return trace

traces = [histogram_z(data, i+1) for i, data in enumerate(new_cluster_matrix)] 
fig = tools.make_subplots(rows = 1, cols = 7, shared_yaxes = True)
for i, trace in enumerate(traces):
    fig.append_trace(trace, 1, i+1)
    fig['layout'].update(yaxis = dict(
        title = 'Relative Frequencies'),
        title = 'Cluster Distribution over X Axis'
        )
    
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]  [ (1,4) x4,y1 ]  [ (1,5) x5,y1 ]  [ (1,6) x6,y1 ]  [ (1,7) x7,y1 ]



In [22]:
from plotly import tools

def histogram_z(data, i):
    trace = go.Histogram(
        x = data[:, 1],
        histnorm = 'probability', 
        name = 'Cluster {}'.format(i),
        xbins = dict(
            start = 0,
            end = 7000,
            size = 700),
    )
    return trace

traces = [histogram_z(data, i+1) for i, data in enumerate(new_cluster_matrix)] 
fig = tools.make_subplots(rows = 1, cols = 7, shared_yaxes = True)
for i, trace in enumerate(traces):
    fig.append_trace(trace, 1, i+1)
    fig['layout'].update( yaxis=dict(
        title = 'Relative Frequencies'),
        title = 'Cluster Distribution over Y Axis'
        )
    
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]  [ (1,4) x4,y1 ]  [ (1,5) x5,y1 ]  [ (1,6) x6,y1 ]  [ (1,7) x7,y1 ]



In [23]:
from plotly import tools

def histogram_z(data, i):
    trace = go.Histogram(
        x = data[:, 2],
        histnorm = 'probability', 
        name = 'Cluster {}'.format(i),
        xbins = dict(
            start = 0,
            end = 33,
            size = 3),
        )
    return trace

traces = [histogram_z(data, i+1) for i, data in enumerate(new_cluster_matrix)] 
fig = tools.make_subplots(rows = 1, cols = 7, shared_yaxes = True)
for i, trace in enumerate(traces):
    fig.append_trace(trace, 1, i+1)
    fig['layout'].update(yaxis=dict(
        title = 'Relative Frequencies'),
        title = 'Cluster Distribution over Z Axis', 
        )
    
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]  [ (1,4) x4,y1 ]  [ (1,5) x5,y1 ]  [ (1,6) x6,y1 ]  [ (1,7) x7,y1 ]



# Five Nearest Neighbors Plot 

The Five Nearest Neighbors Plot plots the five nearest centroids to each F0 cluster mean value. In the background are plotted all of the fluorescently labeled centroids in the Collman14v2 data set in three dimensions in an altered aspect ratio.

In [28]:
nearest_neighbors_dataframe = pd.read_excel('Five_Nearest_Neighbors_to_Clusters.xlsx')
nearest_neighbors_matrix = nearest_neighbors_dataframe.as_matrix()

In [63]:
nearest_neighbors_sorted = nearest_neighbors_matrix[nearest_neighbors_matrix[:, 3].argsort()]

In [68]:
nearest_neighbors_matrix_split = np.split(nearest_neighbors_matrix, np.where(np.diff(nearest_neighbors_matrix[:, 3]))[0] + 1)

In [64]:
A_rows = sorted_cluster_matrix.view([('', sorted_cluster_matrix.dtype)] * sorted_cluster_matrix.shape[1])
B_rows = nearest_neighbors_sorted.view([('', nearest_neighbors_sorted.dtype)] * nearest_neighbors_sorted.shape[1])

non_nearest_neighbors = np.setdiff1d(A_rows, B_rows).view(sorted_cluster_matrix.dtype).reshape(-1, sorted_cluster_matrix.shape[1])

In [65]:
non_n_sorted = non_nearest_neighbors[non_nearest_neighbors[:,3].argsort()]
non_n_sort_split = np.array_split(non_n_sorted, np.where(np.diff(non_n_sorted[:,3]))[0]+1)

In [72]:
trace1 = [scatter(data, i+1) for i, data in enumerate(nearest_neighbors_matrix_split)]

def scatterbackground(data, i):
    trace = go.Scatter3d(
        x = data[:, 0], 
        y = data[:, 1],
        z = data[:, 2],
        mode = 'markers',
        showlegend = True,
        marker = dict(
        size = '3',
        showscale = False,
        ), name = 'Cluster {}'.format(i)
        )
    return trace

trace2 = [scatterbackground(data, i+1) for i, data in enumerate(non_n_sort_split)]

trace_plotted = trace1 + trace2
data = trace_plotted

layout= go.Layout(
    title= 'Five Nearest Neighbors to Each F0 Cluster Mean',
    hovermode= 'closest',
    height = 1000,
    width = 1000,
    showlegend= True
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)