In [None]:
import matplotlib
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import os
import umap
import datashader as ds
import colorcet as cc
import igraph
import tqdm
from scipy import sparse
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import KMeans

from dredFISH.Analysis import TissueGraph
from dredFISH.Visualization import Viz
# from dredFISH.Utils.__init__plots import * 
import seaborn as sns
from dredFISH.Utils import powerplots
from dredFISH.Utils import miscu
from dredFISH.Utils import tmgu

import importlib
importlib.reload(Viz)
importlib.reload(TissueGraph)

In [None]:
# %%time
# # slow for 24 bits; fast for 2D
# k = 30 
# NN = NearestNeighbors(n_neighbors=k)
# NN.fit(XY)
# knn = NN.kneighbors(XY, return_distance=False)

# knn
# # use pynndescent


In [None]:
# %%time
# n_topics_list = [2,5,10]
# n_procs = 3 

# topic_cls = Classification.TopicClassifier(TMG.Layers[0])
# topic_cls.train(n_topics_list=n_topics_list, n_procs=n_procs)
# topics = topic_cls.classify(topic_cls.Env)

# Key lines
# n_topics = 10
# lda = LatentDirichletAllocation(n_components=n_topics)
# B = lda.fit(env)
# T = lda.transform(Env)

#### Load data

In [None]:
respath = '/bigstore/GeneralStorage/fangming/projects/dredfish/figures/'

In [None]:
basepth = '/bigstore/GeneralStorage/Data/dredFISH/Dataset1-t5'
!ls -alhtr $basepth
!head $basepth"/TMG.json"

In [None]:
df = pd.read_csv(
    os.path.join(basepth, "default_analysis.csv"), index_col=0)
df

In [None]:
df.iloc[0]

In [None]:
TMG = TissueGraph.TissueMultiGraph(basepath=basepth, 
                                   redo=False, # load existing 
                                  )
TMG

In [None]:
# spatial coordinates
layer = TMG.Layers[0]
XY = layer.XY
x, y = XY[:,0], XY[:,1]
###
x, y = y, x # a temporary hack
###

cells = layer.adata.obs.index.values

N = layer.N
print(N)
# measured basis
ftrs_mat = layer.feature_mat

# umap_mat = umap.UMAP(n_neighbors=30, min_dist=0.1).fit_transform(ftrs_mat)

# types

# regions

In [None]:
# labels = df['type_r0.1'].values
labels = df['ktype_L3'].values

ctg, ctg_idx = np.unique(labels, return_inverse=True) 
print(ctg)

k_spatial = 30 
NN = NearestNeighbors(n_neighbors=k_spatial)
NN.fit(XY)
knn = NN.kneighbors(XY, return_distance=False)

i = np.repeat(knn[:,0], k_spatial-1) # cells
j = ctg_idx[knn[:,1:]].reshape(-1,) # types it connects
dat = np.repeat(1, len(i))

env_mat = sparse.coo_matrix((dat, (i,j)), shape=(N, len(ctg))).toarray() # dense
env_mat = env_mat/env_mat.sum(axis=1).reshape(-1,1)
env_mat = np.nan_to_num(env_mat, 0)
env_mat

In [None]:
%%time
cols = ['ktype_L1', 'ktype_L2', 'ktype_L3']
k_kms = [2,5,10,20,50,100]

for col in cols: 
    labels = df[col].values
    env_mat = tmgu.get_local_type_abundance(labels, XY=XY, k_spatial=30)
    for k_km in tqdm.tqdm(k_kms):
        kmeans = KMeans(n_clusters=k_km)
        reg_clsts = kmeans.fit_predict(env_mat)
        df[f'regtype_{col}_k{k_km}'] = np.char.add('t', np.array(reg_clsts).astype(str))

In [None]:
hues = cols.copy()
for col in cols: 
    for k_km in k_kms:
        hue = f'regtype_{col}_k{k_km}'
        hues.append(hue)
print(hues)
    
for hue in hues:
    hue_order = np.sort(np.unique(df[hue]))
    ntypes = len(hue_order)

    fig, axs = plt.subplots(1, 2, figsize=(8*2,6))
    fig.suptitle(f"{hue}; n={ntypes}")
    ax = axs[0]
    sns.scatterplot(data=df, x='x', y='y', 
                    hue=hue, hue_order=hue_order, 
                    s=0.5, edgecolor=None, 
                    legend=False,
                    rasterized=True,
                    ax=ax)
    # ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
    ax.set_aspect('equal')
    ax.axis('off')

    ax = axs[1]
    sns.scatterplot(data=df, x='umap_x', y='umap_y', 
                    hue=hue, hue_order=hue_order, 
                    s=0.5, edgecolor=None, 
                    legend=False,
                    rasterized=True,
                    ax=ax)
    # ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
    ax.set_aspect('equal')
    ax.axis('off')
    fig.subplots_adjust(wspace=0)
    plt.show()

In [None]:
# np.array(hues).reshape(-1,3)

newhues = np.vstack([hues[:3], np.array(hues[3:]).reshape(3,-1).T])
newhues


In [None]:

fig, axs = plt.subplots(1*7, 2*3, figsize=(8*2*3,6*7))
axs = axs.flatten()
for i, hue in enumerate(newhues.flatten()):
    hue_order = np.sort(np.unique(df[hue]))
    ntypes = len(hue_order)

    ax = axs[2*i]
    ax.set_title(f"{hue}; n={ntypes}")
    sns.scatterplot(data=df, x='x', y='y', 
                    hue=hue, hue_order=hue_order, 
                    s=0.5, edgecolor=None, 
                    legend=False,
                    rasterized=True,
                    ax=ax)
    # ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
    ax.set_aspect('equal')
    ax.axis('off')

    ax = axs[2*i+1]
    sns.scatterplot(data=df, x='umap_x', y='umap_y', 
                    hue=hue, hue_order=hue_order, 
                    s=0.5, edgecolor=None, 
                    legend=False,
                    rasterized=True,
                    ax=ax)
    # ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=5)
    ax.set_aspect('equal')
    ax.axis('off')
    
fig.subplots_adjust(wspace=0)
powerplots.savefig(fig, '/home/fangming/figures/ctype_rtype_v1.pdf')
plt.show()