# Generating XYZ plots of Gaussian Weighted Collman14v2
The synapses (centroids) were clustered using F0 values- the integrated sum instensity over an 11x11x11 cube.


In [448]:
import numpy as np
import pandas as pd
from functools import partial

from scipy.sparse import csr_matrix


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)




Below chunk of code used to first read a file as a dataframe and then convert it into an array. 
The files used here were in 1-to-1 corresspondence across rows, allowing for sorting and concatenating of the arrays.

In [449]:
centroid_location_dataframe = pd.read_csv('collman14v2_gaussianWeighted_xyzLocations.csv')

In [450]:
centroid_location_matrix = centroid_location_dataframe.as_matrix()

In [451]:
feature_label_dataframe = pd.read_csv('collman14v2_gaussianWeighted_ClusterLabels.csv')

In [452]:
feature_label_matrix = feature_label_dataframe.as_matrix()


In [453]:

feature_value_dataframe = pd.read_csv('collman14v2_gaussianWeighted_F0.csv')
feature_value_matrix = feature_value_dataframe.as_matrix()


In [454]:
nearest_neighbors_dataframe = pd.read_excel('Five_Nearest Neighbors_to Clusters .xlsx')
nearest_neighbors_matrix = nearest_neighbors_dataframe.as_matrix()
nearest_sorted = nearest_neighbors_matrix[nearest_neighbors_matrix[:,3].argsort()]
near_neigh_sorted = np.array_split(nearest_sorted, np.where(np.diff(nearest_sorted[:,3]))[0]+1)


In [455]:
combined_matrix = np.hstack((centroid_location_matrix, feature_label_matrix))

In [456]:
z_sorted = combined_matrix[combined_matrix[:,2].argsort()]

z_split = np.array_split(z_sorted, np.where(np.diff(z_sorted[:,2]))[0]+1)





In [457]:
cluster_sorted = combined_matrix[combined_matrix[:,3].argsort()]

cluster_sorted_split = np.array_split(cluster_sorted, np.where(np.diff(cluster_sorted[:,3]))[0]+1)


## Plot 1: XYZ Spatial plot
The centroids are plotted on the x-y plane corresponding to a specifc z-value, and are coloured according to the cluster they belong to.  


In [458]:
#Spatial cluster plots XYZ
z_slices= {}
for i in range(5, 34):
    z_slices[str(i)] = []


 


In [459]:
for elem in combined_matrix:
    my_z = elem[2]
    z_slices[str(my_z)].append(elem)

In [460]:
def make_trace(i, point_set):
    return go.Scatter(
       x = [elem[0] for elem in point_set],
       y = [elem[1] for elem in point_set],
       mode = 'markers',
       name = 'Cluster ' + str(i+1)
   )

for z, points in z_slices.items():
    point_sets = [[] for _ in range(7)]
    for point in points:
        point_sets[point[3]-1].append([point[0], point[1]])
    traces = [make_trace(i, point_sets[i]) for i in range(7)]
    layout = go.Layout(title='Cluster distribution at z='+str(z), xaxis = dict(title = 'x position'), 
                       yaxis = dict(title = 'y position'),)
    figure = go.Figure(data=traces, layout=layout)
    iplot(figure)

## Plot 2: Location of centroids in XYZ plane
The labelled centroids were plotted on the XYZ plane. Function scatter() expects data sorted into subarrays by cluster label. 

In [461]:
def scatter(data, i):
    
    trace = go.Scatter3d(
        x = data[:, 0], 
        y = data[:, 1],
        z = data[:, 2],
        mode = 'markers',
        marker = dict(
        size = '9',
        showscale = False,
        ), name = 'Cluster {}'.format(i)
        )
    return trace
trace = [scatter(data, i+1) for i, data in enumerate(cluster_sorted_split)]


layout= go.Layout(
    title= 'XYZ spatial location of centroids clustered according to F0 values',
    hovermode= 'closest',
    height = 1000,
    width = 1000,
    showlegend= True
)
data = trace
fig= go.Figure(data=data, layout=layout)
iplot(fig)

## Plot 3: Labelled centroids plotted spatially in native aspect ratio 
The plot is similar to the one above, except layout has been modified to match native aspect ratio, i.e the actual size of the tissue section x=3.72 nm, y=3.72 nm, z= 70.0 nm.

In [442]:
def scatter(data, i):
    
    trace = go.Scatter3d(
        x = data[:, 0], 
        y = data[:, 1],
        z = data[:, 2],
        mode = 'markers',
        marker = dict(
        size = '9',
        showscale = False,
        ), name = 'Cluster {}'.format(i)
        )
    return trace
trace = [scatter(data, i+1) for i, data in enumerate(cluster_sorted_split)]


layout= go.Layout(
    title= 'Altered aspect ratio of XYZ spatial location of centroids',
    scene = dict(
    aspectmode = 'manual',
    aspectratio = dict(x=3.72,y=3.72,z=70)),#changing the aspect ratio to match that of tissue slice
    hovermode= 'closest',

    showlegend= True
)
data = trace
fig= go.Figure(data=data, layout=layout)
iplot(fig)

## Plots 4-6: Distribution of the labelled centroids across x, y and z axis
Histograms show the distribution of the labelled centroids. The bin sizes were not fixed for these plots. The function histogram_x/y/z() expects data sorted into subarrays by cluster label.

In [443]:

from plotly import tools
def histogram_x(data, i):
    trace = go.Histogram(
        x = data[:, 0],
        histnorm = 'probability', 
        name = 'Cluster {}'.format(i),
        
        )
    return trace
traces = [histogram_x(data, i+1) for i, data in enumerate(cluster_sorted_split)] 
fig = tools.make_subplots(rows = 1, cols = 7, shared_yaxes=True)
for i, trace in enumerate(traces):
    fig.append_trace(trace, 1, i+1)
    fig['layout'].update(title = 'Cluster distribution over x-axis', yaxis=dict(
        title = 'Relative Frequencies'),
        )
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]  [ (1,4) x4,y1 ]  [ (1,5) x5,y1 ]  [ (1,6) x6,y1 ]  [ (1,7) x7,y1 ]



In [444]:
#plot of y-axis 
from plotly import tools
def histogram_y(data, i):
    trace = go.Histogram(
        x = data[:, 1],
        histnorm = 'probability', 
        name = 'Cluster {}'.format(i),
        
        )
    return trace
traces = [histogram_y(data, i+1) for i, data in enumerate(cluster_sorted_split)] 
fig = tools.make_subplots(rows = 1, cols = 7, shared_yaxes=True)
for i, trace in enumerate(traces):
    fig.append_trace(trace, 1, i+1)
    fig['layout'].update(title='Cluster distribution over y-axis', yaxis=dict(
        title = 'Relative Frequencies'),
        )
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]  [ (1,4) x4,y1 ]  [ (1,5) x5,y1 ]  [ (1,6) x6,y1 ]  [ (1,7) x7,y1 ]



In [445]:
#Plot for the z-axis 
from plotly import tools
def histogram_z(data, i):
    trace = go.Histogram(
        x = data[:, 2],
        histnorm = 'probability', 
        name = 'Cluster {}'.format(i),
        
        )
    return trace
traces = [histogram_z(data, i+1) for i, data in enumerate(cluster_sorted_split)] 
fig = tools.make_subplots(rows = 1, cols = 7, shared_yaxes=True)
for i, trace in enumerate(traces):
    fig.append_trace(trace, 1, i+1)
    fig['layout'].update(title = 'Cluster distribution over z-axis', yaxis=dict(
        title = 'Relative Frequencies'),
        )
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]  [ (1,4) x4,y1 ]  [ (1,5) x5,y1 ]  [ (1,6) x6,y1 ]  [ (1,7) x7,y1 ]



## Plot 7: 5 nearest neighbours to the F0 mean and non-nearest neighbours plotted spatially 
The nearest neighbours are highlighted with larger marker size, thus, only the marker size was altered in the scatterbackground() function to make non-nearest neighbours' markers smaller.

In [446]:
#first create a numpy array that contains non-nearest neighbor locations and cluster values. To do this, find rows that 
#are different between cluster_sorted and nearest_sorted arrays.
A_rows = cluster_sorted.view([('', cluster_sorted.dtype)] * cluster_sorted.shape[1])
B_rows = nearest_sorted.view([('', nearest_sorted.dtype)] * nearest_sorted.shape[1])

non_nearest_neigh = np.setdiff1d(A_rows, B_rows).view(cluster_sorted.dtype).reshape(-1, cluster_sorted.shape[1])

#sort and split the non-nearest_neigh array by cluster value to be able to use the scatterbackground function above
non_n_sorted = non_nearest_neigh[non_nearest_neigh[:,3].argsort()]
non_n_sort_split = np.array_split(non_n_sorted, np.where(np.diff(non_n_sorted[:,3]))[0]+1)


In [447]:
#taking the above non_n_sort_split array and the near_neigh_sorted to generate the nearest neighbor plot:

trace1 = [scatter(data, i+1) for i, data in enumerate(near_neigh_sorted)] #to loop over and plot the 5 nearest neighbours

#now to add centroids that are not nearest neighbours, only difference in this plot is to make markers smaller:
def scatterbackground(data, i):#data here will be sub-arrays sorted by the cluster label
    
    trace = go.Scatter3d(
        x = data[:, 0], 
        y = data[:, 1],
        z = data[:, 2],
        mode = 'markers',
        showlegend=False, #don't want multiple legends
        marker = dict(
        size = '3',#changed the size to make the markers smaller
        showscale = False,
        ), name = 'Cluster {}'.format(i)
        )
    return trace
trace2 = [scatterbackground(data, i+1) for i, data in enumerate(non_n_sort_split)]

trace_plotted = trace1 + trace2#to concatenate two lists into one data element
data = trace_plotted

layout= go.Layout(
    title= 'Spatial location of 5 nearest neighbours to F0 mean',
    hovermode= 'closest',
    height = 1000,
    width = 1000,
    showlegend= True
)
fig= go.Figure(data=data, layout=layout)
iplot(fig)