In [None]:
import tqdm
import os
import logging 

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import igraph
import umap
import leidenalg as la

from dredFISH.Analysis import TissueGraph
from dredFISH.Analysis import Classification
# from dredFISH.Utils.__init__plots import * 
from dredFISH.Utils import powerplots
from dredFISH.Utils.miscu import leiden
from dredFISH.Visualization.viz_cell_layer import *
import datashader as ds
from dredFISH.Utils import tmgu


from sklearn.cluster import KMeans

# import importlib
# importlib.reload(powerplots)
# importlib.reload(Viz)
# importlib.reload(Classification)
# importlib.reload(TissueGraph)

In [None]:
# plt.plot(np.arange(4))
# plt.savefig('foo.pdf')

#### Load data

In [None]:
logging.basicConfig(format='%(asctime)s - %(message)s', 
                    datefmt='%m-%d %H:%M:%S', 
                    level=logging.INFO,
                    )

In [None]:
# build on top of basepth
basepth = '/bigstore/GeneralStorage/Data/dredFISH/Dataset1-t3'
output_df = os.path.join(basepth, "analysis_dev_v3.csv")
respth = os.path.join(basepth, 'figures')
if not os.path.isdir(respth):
    os.mkdir(respth)

!ls -alhtr $basepth
!head $basepth"/TMG.json"

In [None]:
# setting
split_lines = [
    [(550, -6000), (200, 2000)],
]

In [None]:
# load TMG - with a cell layer
logging.info(f"Load TMG from {basepth}")
TMG = TissueGraph.TissueMultiGraph(basepath=basepth, 
                                   redo=False, # load existing 
                                  )
# unpack relevant stuff
layer = TMG.Layers[0]
N = layer.N
XY = layer.XY
x, y = XY[:,0], XY[:,1]

# measured basis
ftrs_mat = layer.feature_mat
G = layer.FG
cells = layer.adata.obs.index.values

###
x, y = y, x # a temporary hack
XY = np.vstack([x,y]).T
###

logging.info(f"split hemisphere...")
# split hemisphere
cond, isinpoly, XYnew = preview_hemisphere(split_lines, XY=XY, no_plot=True)

# UMAP
logging.info(f"generate UMAP...")
umap_mat = umap.UMAP(n_neighbors=30, min_dist=0.1, random_state=0).fit_transform(ftrs_mat)

# known cell types
logging.info(f"identify known cell types...")
allen_classifier = Classification.KnownCellTypeClassifier(
    layer, 
    tax_name='Allen_types',
    ref='allen_smrt_dpnmf',
    ref_levels=['class_label', 'neighborhood_label', 'subclass_label'], #, 'cluster_label'], 
    model='knn',
)
allen_classifier.train(verbose=True)
type_mat = allen_classifier.classify()

# clustering
logging.info(f"cell clustering (unsupervised types)...")
resolutions = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1,2,5,10]
clst_mat = []
for i, r in tqdm.tqdm(enumerate(resolutions)):
    types = leiden(G, cells, resolution=r)
    # add to a df 
    clst_mat.append(types)

# region types
SG = layer.SG

typebasis = type_mat[:,1] # known cell types Level 2
env_mat = tmgu.get_local_type_abundance(typebasis, SG=SG)
k_kms = [5, 10, 20, 50] 
reg_mat = []
for k_km in tqdm.tqdm(k_kms):
    kmeans = KMeans(n_clusters=k_km, random_state=1)
    reg_clsts = kmeans.fit_predict(env_mat)
    reg_mat.append(reg_clsts)

In [None]:
logging.info(f"organizing results...")
# add results to a df 
# basics
df = pd.DataFrame()
df['x'] = x
df['y'] = y
df['x2'] = XYnew[:,0]
df['y2'] = XYnew[:,1]
df['hemi'] = cond.astype(int)

# basis
for i in range(24):
    df[f'b{i}'] = ftrs_mat[:,i]

# umap
df['umap_x'] = umap_mat[:,0]
df['umap_y'] = umap_mat[:,1]

# ktype
for i in range(3):
    df[f'ktype_L{i+1}'] = type_mat[:,i]
    
# type
for i, r in enumerate(resolutions):
    types = clst_mat[i]
    df[f'type_r{r}'] = np.char.add('t', np.array(types).astype(str))

# region
for i, k_km in enumerate(k_kms):
    df[f'regtype_allenL1basis_k{k_km}'] = np.char.add('reg', np.array(reg_mat[i]).astype(str))
    
# save
df.to_csv(output_df, header=True, index=True)
logging.info(f"saved results to: {output_df}")
df

# Viz

In [None]:
%%time
output = os.path.join(respth, 'fig1_basis_space.pdf')
output = None
powerplots.plot_basis_spatial(df, output=output)


In [None]:
%%time
dfsub = df[df['hemi']==1]
output = os.path.join(respth, 'fig1-2_basis_space_righthalf.pdf')
output = None
powerplots.plot_basis_spatial(dfsub, pmode='right_half', output=output)

In [None]:
%%time
output = os.path.join(respth, 'fig2_basis_umap.pdf')
output = None
powerplots.plot_basis_umap(df, output=output)

In [None]:
%%time
for i, r in enumerate(resolutions):
    hue = f'type_r{r}'
    output = os.path.join(respth, f'fig3-{i}_celltypes_r{r}.pdf')
    output = None
    powerplots.plot_type_spatial_umap(df, hue, output=output)

In [None]:
# known types
ktypecols = df.filter(regex='^ktype_L', axis=1).columns
for i, col in enumerate(ktypecols):
    hue = col
    output = os.path.join(respth, f'fig4-{i}_{col}.pdf')
    powerplots.plot_type_spatial_umap(df, hue, output=output)

In [None]:
# region
regtypecols = df.filter(regex='^regtype_allenL1basis_k', axis=1).columns
for i, col in enumerate(regtypecols):
    hue = col
    output = os.path.join(respth, f'fig5-{i}_{col}.pdf')
    powerplots.plot_type_spatial_umap(df, hue, output=output)