In [None]:
import matplotlib
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import os
import umap
import datashader as ds
import colorcet as cc
import igraph
import tqdm
from scipy import sparse
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import KMeans
from scipy.spatial import Delaunay

from matplotlib.collections import PolyCollection
from matplotlib.colors import ListedColormap

from dredFISH.Analysis import TissueGraph
from dredFISH.Visualization import Viz
from dredFISH.Utils.__init__plots import * 
from dredFISH.Utils import powerplots
from dredFISH.Utils import miscu
from dredFISH.Utils import tmgu

import importlib
importlib.reload(Viz)
importlib.reload(TissueGraph)

#### Load data

In [None]:
respath = '/bigstore/GeneralStorage/fangming/projects/dredfish/figures/'

In [None]:
basepth = '/bigstore/GeneralStorage/Data/dredFISH/Dataset1-t5'
!ls -alhtr $basepth
!head $basepth"/TMG.json"

In [None]:
df = pd.read_csv(
    os.path.join(basepth, "default_analysis.csv"), index_col=0)
df

In [None]:
TMG = TissueGraph.TissueMultiGraph(basepath=basepth, 
                                   redo=False, # load existing 
                                  )

In [None]:
# spatial coordinates
layer = TMG.Layers[0]
XY = layer.XY
x, y = XY[:,0], XY[:,1]
###
x, y = y, x # a temporary hack
###

cells = layer.adata.obs.index.values

N = layer.N
# measured basis
ftrs_mat = layer.feature_mat

# umap_mat = umap.UMAP(n_neighbors=30, min_dist=0.1).fit_transform(ftrs_mat)



In [None]:
# types
typecol = 'type_r0.1'
types = df[typecol].values
# local env
# env_mat = get_local_typeabundance(layer.SG, types)

In [None]:
# dd = Delaunay(XY)

In [None]:
# dgr = layer.SG.indegree() # same as out
# fig, ax = plt.subplots()
# sns.histplot(data=dgr, element='bars', bins=np.arange(0.5, 20), ax=ax)
# ax.set_xlabel('Number of Delaunay neighbors')
# ax.set_ylabel('Number of cells')
# plt.show()

# dgr = layer.SG.indegree() # same as out
# fig, ax = plt.subplots()
# sns.histplot(data=dgr, element='step', bins=np.arange(0.5, 20), ax=ax, cumulative=True)
# ax.set_xlabel('Number of Delaunay neighbors')
# ax.set_ylabel('Number of cells')
# plt.show()

In [None]:
# distance to DD neighbors?
# adj_mtx = SG.get_adjacency_sparse()

In [None]:
# tmp = pd.DataFrame(np.hstack([EL, dists.reshape(-1,1)]))
# tmp
N = len(XY)
EL = np.asarray(layer.SG.get_edgelist())
dists = np.linalg.norm(XY[EL][:,0,:] - XY[EL][:,1,:], ord=2, axis=1)

# duplicate
tmp = pd.DataFrame()
tmp[0] = np.hstack([EL[:,0], EL[:,1]])
# tmp[1] = np.hstack([EL[:,1], EL[:,0]])
tmp[2] = np.hstack([dists,dists])

meandists = tmp.groupby(0).mean().reindex(np.arange(N))[2].values 
mindists = tmp.groupby(0).min().reindex(np.arange(N))[2].values 
meandists, mindists

In [None]:
# # dgr = layer.SG.indegree() # same as out
# fig, ax = plt.subplots()
# sns.histplot(data=meandists, element='bars', 
#              bins=np.linspace(0,100,100), 
#              ax=ax)
# ax.set_xlabel('Mean Delaunay neighbor distance (um)')
# ax.set_ylabel('Number of cells')
# plt.show()

# dgr = layer.SG.indegree() # same as out
# fig, ax = plt.subplots()
# sns.histplot(data=meandists, element='step', 
#              bins=np.linspace(0,100,100), 
#              # bins=np.arange(0.5, 20), 
#              cumulative=True,
#              ax=ax, 
#              )
# ax.set_xlabel('Mean Delaunay neighbor distance (um)')
# ax.set_ylabel('Number of cells')
# plt.show()

In [None]:
typecol = 'type_r0.1'
types = df[typecol]
unq_types = np.unique(types)

In [None]:
# by cell type
unq_types = np.unique(types)
colors = sns.color_palette("tab10", len(unq_types))
nk = np.array(layer.SG.indegree())

fig, ax = plt.subplots()
for i, t in enumerate(unq_types):
    val = meandists[types==t]
    sns.histplot(data=val, element='step', stat='probability',
                 fill=False,
                 label=t,
                 bins=np.linspace(0,100,100), 
                 color=colors[i],
                 ax=ax)
ax.legend()
ax.set_xlabel('Mean Delaunay neighbor distance (um)')
ax.set_ylabel('Fraction of cells')
plt.show()

fig, ax = plt.subplots()
for i, t in enumerate(unq_types):
    val = nk[types==t]
    sns.histplot(data=val, element='step', stat='probability',
                 fill=False,
                 label=t,
                 bins=np.arange(0.5, 21), 
                 color=colors[i],
                 ax=ax)
ax.legend()
ax.set_ylabel('Fraction of cells')
ax.set_xlabel('Num. Delaunay neighbor')
plt.show()

In [None]:
typecols = df.filter(regex='^type_|^ktype_', axis=1).columns
typecols

In [None]:
nk = np.array(layer.SG.indegree())

n = len(typecols)
nx = 3
ny = int((n+nx-1)/nx) 

fig, axs = plt.subplots(ny, nx, figsize=(6*nx, 4*ny), sharex=True)
for j, (typecol, ax) in enumerate(zip(typecols, axs.flat)):
    types = df[typecol].values
    # by cell type
    unq_types = np.unique(types)
    colors = sns.color_palette("tab10", len(unq_types))

    for i, t in enumerate(unq_types):
        val = nk[types==t]
        sns.histplot(data=val, element='step', stat='probability',
                     fill=False,
                     label=t,
                     bins=np.arange(0.5, 21), 
                     color=colors[i],
                     ax=ax)
    if len(unq_types) < 15:
        ax.legend(fontsize=10)
    else:
        pass
    
    ax.set_title(typecol)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Fraction of cells')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
        
for ax in axs.flat[j+1:]:
    ax.axis('off')
plt.show()

In [None]:
nk = np.array(layer.SG.indegree())

n = len(typecols)
nx = 3
ny = int((n+nx-1)/nx) 

fig, axs = plt.subplots(ny, nx, figsize=(6*nx, 4*ny), sharex=True)
for j, (typecol, ax) in enumerate(zip(typecols, axs.flat)):
    types = df[typecol].values
    # by cell type
    unq_types = np.unique(types)
    colors = sns.color_palette("tab10", len(unq_types))

    for i, t in enumerate(unq_types):
        val = meandists[types==t]
        sns.histplot(data=val, element='step', stat='probability',
                     fill=False,
                     label=t,
                     bins=np.linspace(0,100,100), 
                     color=colors[i],
                     ax=ax)
    if len(unq_types) < 15:
        ax.legend(fontsize=10)
    else:
        pass
    
    ax.set_title(typecol)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Fraction of cells')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
        
for ax in axs.flat[j+1:]:
    ax.axis('off')
plt.show()

# Allen cell types only

In [None]:
typemap = df[['ktype_L3', 'ktype_L2']].groupby('ktype_L3').first()['ktype_L2']
typemap

In [None]:
types = df['ktype_L3'].values
unq_types = np.sort(np.unique(types))

major_types = np.sort(np.unique(typemap))
nx = 4
ny = 2
fig, axs = plt.subplots(ny, nx, figsize=(nx*5, ny*4), sharex=True, sharey=True)
for j, major_type in enumerate(major_types):
    ax = axs.flat[j]
    minor_types = typemap[typemap==major_type].index
    ax.set_title(major_type)
    for t in unq_types:
        val = meandists[types==t]
        sns.histplot(data=val, element='step', stat='probability',
                     fill=False,
                     label='',
                     bins=np.linspace(0,100,100), 
                     color='lightgray',
                     cumulative=True,
                     ax=ax)
    for i, t in enumerate(minor_types):
        val = meandists[types==t]
        sns.histplot(data=val, element='step', stat='probability',
                     fill=False,
                     label=t,
                     bins=np.linspace(0,100,100), 
                     color=colors[i],
                     cumulative=True,
                     ax=ax)
    ax.legend(fontsize=12)
    sns.despine(ax=ax)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Fraction of cells')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
fig.subplots_adjust(wspace=0.01)
plt.show()
        

In [None]:
types = df['ktype_L3'].values
unq_types = np.sort(np.unique(types))

major_types = np.sort(np.unique(typemap))
nx = 4
ny = 2
fig, axs = plt.subplots(ny, nx, figsize=(nx*6, ny*5), sharex=True, sharey=True)
for j, major_type in enumerate(major_types):
    ax = axs.flat[j]
    minor_types = typemap[typemap==major_type].index
    ax.set_title(major_type)
    for t in unq_types:
        val = meandists[types==t]
        sns.histplot(data=val, element='step', stat='probability',
                     fill=False,
                     label='',
                     bins=np.linspace(0,100,100), 
                     color='lightgray',
                     linewidth=1,
                     # cumulative=True,
                     ax=ax)
    for i, t in enumerate(minor_types):
        val = meandists[types==t]
        sns.histplot(data=val, element='step', stat='probability',
                     fill=False,
                     label=t,
                     bins=np.linspace(0,100,100), 
                     color=colors[i],
                     linewidth=2,
                     # cumulative=True,
                     ax=ax)
    ax.legend(fontsize=12)
    sns.despine(ax=ax)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Fraction of cells')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
fig.subplots_adjust(wspace=0.01)
plt.show()
        

# Poisson Process

$$G(w<W)=1-e^{-\lambda\pi w^2}$$

To find $\lambda$; fit CDF from data for each cell type:
$$y=1-e^{-\lambda\pi x^2}$$
$$\lambda = \frac{-\log(1-y)}{\pi x^2}$$


In [None]:
def poisson_neighbor_cdf(x, lmd=1):
    """
    """
    return 1-np.exp(-lmd*np.pi*np.power(x,2))
    

In [None]:
w = np.linspace(0,5,100)
g = poisson_neighbor_cdf(w, lmd=1)
plt.plot(w, g)
plt.xlabel('w')
plt.ylabel('G(w)')

# Mean dists

In [None]:
from sklearn.linear_model import LinearRegression

types = df['ktype_L3'].values
unq_types = np.sort(np.unique(types))
hist_dict = {}
cdf_dict = {}
lmd_hats = {}
lmdp_hats = {}
bins = np.linspace(0,100,100)
for t in unq_types:
    val = meandists[types==t]
    hist, _ = np.histogram(val, 
                 bins=bins,
                 density=True
                )
    hist_dict[t] = hist
    
    cdf = np.cumsum(hist)
    cdf_dict[t] = cdf
    
    _y = -np.log(1-cdf)
    _x = np.pi*np.power(bins[:-1], 2)
    lmd_hat = _y/np.clip(_x, 1e-10, None)
    lmd_hats[t] = lmd_hat
    
    LR = LinearRegression(fit_intercept=True)
    LR.fit(_x.reshape(-1,1), _y)
    lmdp_hats[t] = LR.coef_[0] # only 1 num
    

In [None]:
colors = sns.color_palette('tab10', n_colors=10)
colors

In [None]:
major_types = np.sort(np.unique(typemap))
nx = 4
ny = 2
fig, axs = plt.subplots(ny, nx, figsize=(nx*6, ny*5), sharex=True, sharey=True)
for j, major_type in enumerate(major_types):
    ax = axs.flat[j]
    minor_types = typemap[typemap==major_type].index
    ax.set_title(major_type)
    for i, t in enumerate(unq_types):
        val = cdf_dict[t] 
        lmd = lmdp_hats[t]
        ax.plot(bins[:-1], val, label='', color='lightgray', linewidth=1)
        
    for i, t in enumerate(minor_types):
        val = cdf_dict[t] 
        lmd = lmdp_hats[t]
        valfit = poisson_neighbor_cdf(bins[:-1], lmd=lmd)
        ax.plot(bins[:-1], val, label=t, linewidth=2, color=colors[i])
        ax.plot(bins[:-1], valfit, label=t, linewidth=2, linestyle='--', color=colors[i])
        
    ax.legend(fontsize=12)
    sns.despine(ax=ax)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Lambda estimate')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
fig.subplots_adjust(wspace=0.01)
plt.show()
        

In [None]:
major_types = np.sort(np.unique(typemap))
nx = 4
ny = 2
fig, axs = plt.subplots(ny, nx, figsize=(nx*6, ny*5), sharex=True, sharey=True)
for j, major_type in enumerate(major_types):
    ax = axs.flat[j]
    minor_types = typemap[typemap==major_type].index
    ax.set_title(major_type)
    for i, t in enumerate(unq_types):
        val = lmdp_hats[t]
        val = [val]*(len(bins)-1)
        ax.plot(bins[:-1], val, label='', color='lightgray', linewidth=1)
        
    for i, t in enumerate(minor_types):
        val = lmdp_hats[t]
        val = [val]*(len(bins)-1)
        ax.plot(bins[:-1], val, label=t, linewidth=2)
        
    ax.legend(fontsize=12)
    sns.despine(ax=ax)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Lambda estimate')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
fig.subplots_adjust(wspace=0.01)
plt.show()
        

In [None]:
major_types = np.sort(np.unique(typemap))
nx = 4
ny = 2
fig, axs = plt.subplots(ny, nx, figsize=(nx*6, ny*5), sharex=True, sharey=True)
for j, major_type in enumerate(major_types):
    ax = axs.flat[j]
    minor_types = typemap[typemap==major_type].index
    ax.set_title(major_type)
    for i, t in enumerate(unq_types):
        val = lmd_hats[t]
        ax.plot(bins[:-1], val, label='', color='lightgray', linewidth=1)
        
    for i, t in enumerate(minor_types):
        val = lmd_hats[t]
        ax.plot(bins[:-1], val, label=t, linewidth=2)
        
    ax.legend(fontsize=12)
    sns.despine(ax=ax)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Lambda estimate')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
fig.subplots_adjust(wspace=0.01)
plt.show()
        

In [None]:
major_types = np.sort(np.unique(typemap))
nx = 4
ny = 2
fig, axs = plt.subplots(ny, nx, figsize=(nx*6, ny*5), sharex=True, sharey=True)
for j, major_type in enumerate(major_types):
    ax = axs.flat[j]
    minor_types = typemap[typemap==major_type].index
    ax.set_title(major_type)
    for i, t in enumerate(unq_types):
        val = hist_dict[t]
        val = val/np.clip((np.pi*bins[:-1]), 1e-10, None)
        ax.plot(bins[:-1], val, label='', color='lightgray', linewidth=1)
        
    for i, t in enumerate(minor_types):
        val = hist_dict[t]
        val = val/np.clip((np.pi*bins[:-1]), 1e-10, None)
        ax.plot(bins[:-1], val, label=t, linewidth=2)
        
    ax.legend(fontsize=12)
    sns.despine(ax=ax)
    if j % nx == 0:
        ax.set_xlabel('Mean Delaunay neighbor distance (um)')
        ax.set_ylabel('Lambda estimate')
    else:
        ax.set_xlabel('')
        ax.set_ylabel('')
fig.subplots_adjust(wspace=0.01)
plt.show()
        