In [None]:
import matplotlib
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import os
import umap
import datashader as ds
import colorcet as cc
import igraph
import tqdm
from scipy import sparse
from scipy import stats
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import LatentDirichletAllocation
from statsmodels.stats.multitest import multipletests
from sklearn.cluster import KMeans
from scipy.spatial import Delaunay
import json
import itertools
import pynndescent
import time
from sklearn.svm import SVC

from matplotlib.collections import PolyCollection
from matplotlib.colors import ListedColormap

from dredFISH.Analysis import TissueGraph
from dredFISH.Visualization import Viz
from dredFISH.Utils.__init__plots import * 
from dredFISH.Utils import powerplots
from dredFISH.Utils import miscu
from dredFISH.Utils import tmgu

import importlib
importlib.reload(Viz)
importlib.reload(TissueGraph)
importlib.reload(powerplots)

#### Load data

In [None]:
respath = '/bigstore/GeneralStorage/fangming/projects/dredfish/figures/'

In [None]:
basepth = '/bigstore/GeneralStorage/Data/dredFISH/Dataset1-t5'
!ls -alhtr $basepth
!head $basepth"/TMG.json"

In [None]:
df = pd.read_csv(
    os.path.join(basepth, "default_analysis.csv"), index_col=0)
df

In [None]:
TMG = TissueGraph.TissueMultiGraph(basepath=basepth, 
                                   redo=False, # load existing 
                                  )

In [None]:
# spatial coordinates
layer = TMG.Layers[0]
XY = layer.XY
x, y = XY[:,0], XY[:,1]
###
x, y = y, x # a temporary hack
###

cells = layer.adata.obs.index.values

N = layer.N
# measured basis
ftrs_mat = layer.feature_mat

# umap_mat = umap.UMAP(n_neighbors=30, min_dist=0.1).fit_transform(ftrs_mat)




# Lateral symmetry measure

In [None]:
def build_feature_graph_knnlite(ftrs_mat, k=15, metric='cosine'):
    """
    """
    N = len(ftrs_mat)
    
    # kNN graph
    knn = pynndescent.NNDescent(ftrs_mat,
                                n_neighbors=k,
                                metric=metric,
                                diversify_prob=1,
                                pruning_degree_multiplier=1.5,
                                )
    idx, _ = knn.neighbor_graph

    # to adj and to graph
    i = np.repeat(np.arange(N), k-1)
    j = idx[:,1:].reshape(-1,)
    adj_mat = sparse.coo_matrix((np.repeat(1, len(i)), (i,j)), shape=(N,N))
    G = tmgu.adjacency_to_igraph(adj_mat, directed=False, simplify=True)
    
    return G

In [None]:
# from meta
f = '/bigstore/GeneralStorage/fangming/projects/dredfish/data_dump/analysis_meta_Mar31.json'
with open(f, 'r') as fh:
    meta = json.load(fh)

In [None]:
df_h1 = df[df['hemi']==0].copy()
df_h2 = df[df['hemi']==1].copy()

# separate cells h1 and h2
cells_h1 = df.index[df['hemi'] == 0].values
cells_h2 = df.index[df['hemi'] == 1].values

In [None]:
%%time
ftrs_mat_h1 = ftrs_mat[cells_h1]
G_h1 = build_feature_graph_knnlite(ftrs_mat_h1, k=15, metric='cosine')

ftrs_mat_h2 = ftrs_mat[cells_h2]
G_h2 = build_feature_graph_knnlite(ftrs_mat_h2, k=15, metric='cosine')

In [None]:
# clustering half-and-half
resolutions = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1,2,5,10]
clst_mat_h1 = []
clst_mat_h2 = []
for i, r in tqdm.tqdm(enumerate(resolutions)):
    #
    types_h1 = miscu.leiden(G_h1, cells_h1, resolution=r)
    types_h2 = miscu.leiden(G_h2, cells_h2, resolution=r)
    
    # add to a df 
    df_h1[f'type_r{r}'] = np.char.add('t', np.array(types_h1).astype(str))
    df_h2[f'type_r{r}'] = np.char.add('t', np.array(types_h2).astype(str))
    clst_mat_h1.append(types_h1)
    clst_mat_h2.append(types_h2)
    
    
clst_mat_h1 = np.array(clst_mat_h1)
clst_mat_h2 = np.array(clst_mat_h2)

In [None]:
# for i, r in enumerate(resolutions):
#     hue = f'type_r{r}'
#     output = None
#     powerplots.plot_type_spatial_umap(df_h1, hue, output=output)
#     powerplots.plot_type_spatial_umap(df_h2, hue, output=output)
    
#     # break

In [None]:
# matching the other half (across modality)


# N = len(ftrs_mat)

# # kNN graph
# knn = pynndescent.NNDescent(ftrs_mat_h1,
#                             n_neighbors=15,
#                             # metric='cosine',
#                             metric='euclidean',
#                             diversify_prob=1,
#                             pruning_degree_multiplier=1.5,
#                             )
# idx, _ = knn.neighbor_graph

# # to adj and to graph
# i = np.repeat(np.arange(N), k-1)
# j = idx[:,1:].reshape(-1,)
# adj_mat = sparse.coo_matrix((np.repeat(1, len(i)), (i,j)), shape=(N,N))
# G = tmgu.adjacency_to_igraph(adj_mat, directed=False, simplify=True)


In [None]:
def mapping_types(
    ftrs_source,  
    types_source,
    ftrs_target,
    ):
    """
    Uses SVM -- a bit slow
    can we get faster implementation using bi-partite graph?
    """
    if len(np.unique(types_source)) == 1:
        return np.repeat(np.unique(types_source), len(ftrs_target))
    
    model = SVC(C=1, kernel='rbf')
    model.fit(ftrs_source, types_source)
    types_target = model.predict(ftrs_target)
    
    return types_target

In [None]:
%%time
for r in tqdm.tqdm(resolutions):
    # predict paired labels for h2 cells using h1 labels
    types_h1 = df_h1[f'type_r{r}'].values
    ptypes_h2 = mapping_types(ftrs_mat_h1, types_h1, ftrs_mat_h2)
    df_h2[f'ptype_r{r}'] = ptypes_h2
    
    # reverse case
    types_h2 = df_h2[f'type_r{r}'].values
    ptypes_h1 = mapping_types(ftrs_mat_h2, types_h2, ftrs_mat_h1)
    df_h1[f'ptype_r{r}'] = ptypes_h1
    

In [None]:
for i, r in enumerate(resolutions):
    output = None
    hue = f'type_r{r}'
    powerplots.plot_type_spatial_umap(df_h1, hue, output=output)
    hue = f'ptype_r{r}'
    powerplots.plot_type_spatial_umap(df_h2, hue, output=output)
    
    # break

In [None]:
# save the df temporarily so we can develop off of it

In [None]:
# generate conf mats and accuracy stats 

In [None]:
# iterate