In [None]:
import matplotlib
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import os
import umap
import datashader as ds
import colorcet as cc
import igraph
import tqdm
from sklearn.neighbors import NearestNeighbors

from dredFISH.Analysis import TissueGraph
from dredFISH.Analysis import Classification
from dredFISH.Visualization import Viz

from dredFISH.Utils.__init__plots import * 
from dredFISH.Utils import powerplots

import importlib
importlib.reload(Viz)
importlib.reload(Classification)
importlib.reload(TissueGraph)

#### Load data

In [None]:
respath = '/bigstore/GeneralStorage/fangming/projects/dredfish/figures/'

In [None]:
basepth = '/bigstore/GeneralStorage/Data/dredFISH/Dataset1-t3'
!ls -alhtr $basepth
!head $basepth"/TMG.json"

In [None]:
TMG = TissueGraph.TissueMultiGraph(basepath=basepth, 
                                   redo=False, # load existing 
                                  )
TMG

In [None]:
# spatial coordinates
layer = TMG.Layers[0]


XY = layer.XY
x, y = XY[:,0], XY[:,1]
###
x, y = y, x # a temporary hack
###

N = layer.N

# measured basis
ftrs_mat = layer.feature_mat
# UMAP
umap_mat = umap.UMAP(n_neighbors=30, min_dist=0.1, random_state=0).fit_transform(ftrs_mat)

In [None]:
df = pd.DataFrame()
df['x'] = x
df['y'] = y
df['umap_x'] = umap_mat[:,0]
df['umap_y'] = umap_mat[:,1]
for i in range(24):
    df[f'b{i}'] = ftrs_mat[:,i]
df

In [None]:
%%time
path = os.path.join(respath, 'basis_space.pdf')
P = powerplots.PlotScale(df['x'].max()-df['x'].min(), 
                         df['y'].max()-df['y'].min(),
                         # npxlx=300,
                         pxl_scale=20,
                        )
print(P.npxlx, P.npxly)

nx, ny = 6, 4
fig, axs = plt.subplots(ny, nx, figsize=(nx*6, ny*5))
for i in range(24):
    ax = axs.flat[i]
    aggdata = ds.Canvas(P.npxlx, P.npxly).points(df, 'x', 'y', agg=ds.mean(f'b{i}'))
    ax.imshow(aggdata, origin='lower', aspect='equal', cmap='coolwarm', vmin=-3, vmax=3, interpolation='none')
    ax.set_title(f'b{i}', loc='left', y=0.9)
    ax.axis('off')
fig.subplots_adjust(wspace=0.05, hspace=0)
# powerplots.savefig_autodate(fig, path)
plt.show()

In [None]:
%%time
x, y = 'umap_x', 'umap_y'
path = os.path.join(respath, 'basis_umap.pdf')
P = powerplots.PlotScale(df[x].max()-df[x].min(), 
                         df[y].max()-df[y].min(),
                         npxlx=300,
                        )
print(P.npxlx, P.npxly)

nx, ny = 6, 4
fig, axs = plt.subplots(ny, nx, figsize=(nx*5, ny*4))
for i in range(24):
    ax = axs.flat[i]
    aggdata = ds.Canvas(P.npxlx, P.npxly).points(df, x, y, agg=ds.mean(f'b{i}'))
    ax.imshow(aggdata, origin='lower', aspect='equal', cmap='coolwarm', vmin=-3, vmax=3, interpolation='none')
    ax.set_title(f'b{i}', loc='left', y=0.9)
    ax.axis('off')
fig.subplots_adjust(wspace=0.05, hspace=0.1)
# powerplots.savefig_autodate(fig, path)
plt.show()

# map to known cell types

In [None]:
%%time
# create known cell type classifier and train and predict
allen_classifier = Classification.KnownCellTypeClassifier(
    layer, 
    tax_name='Allen_types',
    ref='allen_smrt_dpnmf',
    ref_levels=['class_label', 'neighborhood_label', 'subclass_label'], #, 'cluster_label'], 
    model='knn',
)
allen_classifier.train(verbose=True)
type_mat = allen_classifier.classify()

type_mat.shape, type_mat

In [None]:
# add to df
for i in range(3):
    df[f'ktype_L{i+1}'] = type_mat[:,i]
df

In [None]:
df.filter(regex=r'^ktype')

# Cell types (Leiden clustering)

In [None]:
import leidenalg as la
def leiden(G, cells,
           resolution=1, seed=0, n_iteration=2,
           **kwargs,
          ):
    """cells are in order
    """
    partition = la.find_partition(G, 
                                  la.RBConfigurationVertexPartition, # modularity with resolution
                                  resolution_parameter=resolution, seed=seed, n_iterations=n_iteration, **kwargs)
    # get cluster labels from partition
    labels = [0]*(len(cells)) 
    for i, cluster in enumerate(partition):
        for element in cluster:
            labels[element] = i+1
    return labels



In [None]:
%%time
# feature graph to generete cell types
G = layer.FG
cells = layer.adata.obs.index.values
resolutions = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1,2,5,10]

for r in tqdm.tqdm(resolutions):
    types = leiden(G, cells, resolution=r)
    df[f'type_r{r}'] = np.char.add('t', np.array(types).astype(str))
df

In [None]:
%%time
hue = 'type_r1'
hue_order = np.sort(np.unique(df[hue]))
ntypes = len(hue_order)
        
fig, axs = plt.subplots(1, 2, figsize=(8*2,6))
fig.suptitle(f"{hue}; n={ntypes}")
ax = axs[0]
sns.scatterplot(data=df, x='x', y='y', 
                hue=hue, hue_order=hue_order, 
                s=0.5, edgecolor=None, 
                legend=False,
                ax=ax)
# ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
ax.set_aspect('equal')
ax.axis('off')

ax = axs[1]
sns.scatterplot(data=df, x='umap_x', y='umap_y', 
                hue=hue, hue_order=hue_order, 
                s=0.5, edgecolor=None, 
                legend=False,
                ax=ax)
# ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
ax.set_aspect('equal')
ax.axis('off')
fig.subplots_adjust(wspace=0)
plt.show()

In [None]:
for r in resolutions:
    hue = f'type_r{r}'
    hue_order = np.sort(np.unique(df[hue]))
    ntypes = len(hue_order)

    fig, axs = plt.subplots(1, 2, figsize=(8*2,6))
    fig.suptitle(f"{hue}; n={ntypes}")
    ax = axs[0]
    sns.scatterplot(data=df, x='x', y='y', 
                    hue=hue, hue_order=hue_order, 
                    s=0.5, edgecolor=None, 
                    legend=False,
                    ax=ax)
    # ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
    ax.set_aspect('equal')
    ax.axis('off')

    ax = axs[1]
    sns.scatterplot(data=df, x='umap_x', y='umap_y', 
                    hue=hue, hue_order=hue_order, 
                    s=0.5, edgecolor=None, 
                    legend=False,
                    ax=ax)
    # ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
    ax.set_aspect('equal')
    ax.axis('off')
    fig.subplots_adjust(wspace=0)
    plt.show()

# save df 

In [None]:
df.to_csv(os.path.join(basepth, "analysis_dev_v2.csv"), header=True, index=True)

# Local env (Isozones) - old
- same cell type zones
- instead quantify the nearest neighbors of the same cell type

In [None]:
# call isozones
# compare the properties of isozone for each cell type
hue = 'type_r0.1'
labels = df[hue].values
labels

In [None]:
# isozone
EL = np.asarray(layer.SG.get_edgelist())
EL_types = labels[EL]
EL_sametypes = EL[EL_types[:,0] == EL_types[:,1]]

IsoZoneG = igraph.Graph(n=N, edges=EL_sametypes, directed=False)
IsoZOneG = IsoZoneG.as_undirected().simplify() # what this means?
cmp = IsoZoneG.components()
Ix = np.asarray(cmp.membership)

isozones = Ix

In [None]:
labels, isozones

In [None]:
# Ix.shape
unq, cnts = np.unique(Ix, return_counts=True)
plt.plot(np.log10(np.sort(cnts)))

In [None]:

uniqlabels = np.sort(np.unique(labels))
for i in uniqlabels:
    subIso = isozones[labels==i]
    unq, cnts = np.unique(subIso, return_counts=True)
    # plt.plot(np.log10(np.sort(cnts)), np.linspace(0, 1, len(cnts)), label=i)
    _x = np.cumsum(np.sort(cnts))
    plt.plot(np.log10(np.sort(cnts)), _x/_x[-1], label=i)
    # plt.plot(np.sort(cnts), np.linspace(0, 1, len(cnts)), label=i)
plt.legend()
plt.xlabel('log10(isozone size)')
plt.ylabel('CDF - Num. Isozones or Cells')