In [1]:
import sys
sys.path.append('/home/xinyiz/pamrats')

import time
import os

import scanpy
import numpy as np
import scipy.sparse as sp

import torch
from torch import optim

# from sklearn.metrics import roc_auc_score
# from sklearn.metrics import average_precision_score

import gae.gae.optimizer as optimizer
import gae.gae.model
import gae.gae.preprocessing as preprocessing

import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import umap
import pandas as pd
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import DBSCAN,MiniBatchKMeans,AgglomerativeClustering
from sklearn import metrics
from sklearn.linear_model import LogisticRegression


In [7]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1" 
ifplot=True
ifcluster=True
byCT=True

logitL1=0.5 #smaller is sparser
geneThresh=2.5 #times std above mean

# inverseAct='leakyRelu'
inverseAct=None
plottype='umap'
pca=PCA()
minCells=15 #min number of cells for analysis
minCell_clusterDE=5
# clustermethod=['leiden']
clustermethod=['leiden','agglomerative','kmeanbatch']
#umap/leiden clustering parameters
n_neighbors=10
min_dist=0.25
n_pcs=40 #for clustering
# resolution=[0.5,0.8,1,1.5]
resolution=[0.05,0.1,0.2,0.3,0.5,0.8,1,1.5]
plotepoch=53
savenameAdd=''
#DBscan
epslist= [6,8,10]
min_sampleslist=[15,30,45] 
#agglomerative
nclusterlist=[2,3,4,5,8,10,15]
aggMetric=['euclidean']


combineCelltype={'glia':['Astro','Micro', 'OPC', 'Oligo'],'CA':['CA1', 'CA2', 'CA3']}

use_cuda=True
fastmode=False #Validate during training pass
seed=3

protein=None #'scaled_binary'
# proteinWeights=0.05
# randFeatureSubset=None
plot_samples={'disease13':'AD_mouse9494','control13':'AD_mouse9498','disease8':'AD_mouse9723','control8':'AD_mouse9735'}
plot_sample_X=['logminmax']
plotRecon='' #'meanRecon'
# plot_sample_X=['corrected','scaled']
standardizeX=False
name='all_thresh25_01'
logsavepath='/mnt/external_ssd/xinyi/log/train_jointGAEcnn_starmap/'+name
modelsavepath='/mnt/external_ssd/xinyi/models/train_jointGAEcnn_starmap/'+name
plotsavepath='/mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/'+name
datadir='/home/xinyiz/2021-01-13-mAD-test-dataset'

In [3]:
# Set cuda and seed
np.random.seed(seed)
if use_cuda and (not torch.cuda.is_available()):
    print('cuda not available')
    use_cuda=False
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.enabled = True


In [4]:
#Load data
savedir=os.path.join('/mnt/xinyi/','starmap')
adj_dir=os.path.join(savedir,'a')

featureslist={}
if plot_sample_X[0] in ['corrected','scaled']:
    scaleddata=scanpy.read_h5ad(datadir+'/2020-12-27-starmap-mAD-scaled.h5ad')
    
    for s in plot_samples.keys():
        featureslist[s+'X_'+'corrected']=torch.tensor(scaleddata.layers['corrected'][scaleddata.obs['sample']==plot_samples[s]])
        featureslist[s+'X_'+'scaled']=torch.tensor(scaleddata.layers['scaled'][scaleddata.obs['sample']==plot_samples[s]])
    
else:
    scaleddata=scanpy.read_h5ad(datadir+'/2020-12-27-starmap-mAD-raw.h5ad')
    
    for s in plot_samples.keys():
        scaleddata_train=scaleddata.X[scaleddata.obs['sample']==plot_samples[s]]

        if plot_sample_X[0]=='logminmax':
            featurelog_train=np.log2(scaleddata_train+1/2)
            scaler = MinMaxScaler()
            featurelog_train_minmax=np.transpose(scaler.fit_transform(np.transpose(featurelog_train)))
            featureslist[s+'X_'+plot_sample_X[0]]=torch.tensor(featurelog_train_minmax)



In [5]:
def plotCTcomp(labels,ctlist,savepath,savenamecluster,addname=''):
    res=np.zeros((np.unique(labels).size,np.unique(ctlist).size))
    for li in range(res.shape[0]):
        l=np.unique(labels)[li]
        nl=np.sum(labels==l)
        ctlist_l=ctlist[labels==l]
        for ci in range(res.shape[1]):
            c=np.unique(ctlist)[ci]
            res[li,ci]=np.sum(ctlist_l==c)
#             res[li,ci]=np.sum(ctlist_l==c)/nl
    if not byCT:
        addname+=''
        for li in range(res.shape[0]):
            l=np.unique(labels)[li]
            nl=np.sum(labels==l)
            res[li]=res[li]/nl
    else:
        addname+='_normbyCT'
        for ci in range(res.shape[1]):
            c=np.unique(ctlist)[ci]
            nc=np.sum(ctlist==c)
            res[:,ci]=res[:,ci]/nc
    
    fig, ax = plt.subplots(figsize=(10, 10))
    im = ax.imshow(res,cmap='binary')
    ax.set_yticks(np.arange(np.unique(labels).size))
    ax.set_yticklabels(np.unique(labels))
    ax.set_xticks(np.arange(np.unique(ctlist).size))
    ax.set_xticklabels(np.unique(ctlist))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
    fig.tight_layout()
    plt.savefig(os.path.join(savepath,savenamecluster+'_ctComposition'+addname+'.jpg'))
    plt.close()
        
        

def compLeiden(ctlist,n_neighbors,n_pcs,min_dist,resolution,addName=''):
    for r in resolution:
        savenamecluster='leiden_nn'+str(n_neighbors)+'mdist0'+str(int(min_dist*100))+'n_pcs'+str(n_pcs)+'res'+str(r)+'epoch'+str(plotepoch)
        readpath=os.path.join(clustersavedir,savenamecluster)
        if not os.path.exists(readpath):
            print('DNE: '+readpath)
            continue
        with open(readpath, 'rb') as input:
            labels = pickle.load(input)
        labels=np.array(labels)
        if np.unique(labels).shape[0]==1:
            continue
        
        savepath=clustersavedir
        plotCTcomp(labels,ctlist,savepath,savenamecluster,addName)

def compDBscan(ctlist,epsL,min_samplesL,n_pcs,addName=''):
    for eps in epsL:
        for min_samples in min_samplesL:
            savenamecluster='dbscan_eps'+str(eps)+'msamples'+str(min_samples)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)
            readpath=os.path.join(clustersavedir,savenamecluster)
            if not os.path.exists(readpath):
                print('DNE: '+readpath)
                continue
            with open(readpath, 'rb') as input:
                labels = pickle.load(input)
            labels=np.array(labels)
            if np.unique(labels).shape[0]==1:
                continue
            
            savepath=clustersavedir
            plotCTcomp(labels,ctlist,savepath,savenamecluster,addName)
                
def compAgg(ctlist,nclusterL,aggmetricL,n_pcs,addName=''):
    for ncluster in nclusterL:
        for aggmetric in aggmetricL:
            savenamecluster='agg_ncluster'+str(ncluster)+aggmetric+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)
            readpath=os.path.join(clustersavedir,savenamecluster)
            if not os.path.exists(readpath):
                print('DNE: '+readpath)
                continue
            with open(readpath, 'rb') as input:
                labels = pickle.load(input)
            labels=np.array(labels)
            if np.unique(labels).shape[0]==1:
                continue
            savepath=clustersavedir
            plotCTcomp(labels,ctlist,savepath,savenamecluster,addName)
            
def compMinibatchKmean(ctlist,nclusterL,n_pcs,addName=''):
    for ncluster in nclusterL:
        savenamecluster='minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)
        readpath=os.path.join(clustersavedir,savenamecluster)
        if not os.path.exists(readpath):
            print('DNE: '+readpath)
            continue
        with open(readpath, 'rb') as input:
            labels = pickle.load(input)
        labels=np.array(labels)
        if np.unique(labels).shape[0]==1:
            continue
            
        savepath=clustersavedir
        plotCTcomp(labels,ctlist,savepath,savenamecluster,addName)

In [70]:
#all cells
for s in plot_samples.keys():
    sampleidx=plot_samples[s]
    
    celltype_broad=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'top_level']
    celltype_sub=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'cell_type_label']
    region=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']
#     sobj_coord_np=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,['x','y']].to_numpy()
    for xcorr in plot_sample_X:
        samplename=s+'X_'+xcorr
        
        if inverseAct:
            samplename+='_beforeAct'
       
        sampledir=os.path.join(plotsavepath,samplename)
        clustersavedir=os.path.join(plotsavepath,samplename,'cluster')

        if 'leiden' in clustermethod:
            compLeiden(celltype_broad,n_neighbors,n_pcs,min_dist,resolution)
        if 'dbscan' in clustermethod:
            compDBscan(celltype_broad,epslist,min_sampleslist,n_pcs)
        if 'agglomerative' in clustermethod:
            compAgg(celltype_broad,nclusterlist,aggMetric,n_pcs)
        if 'kmeanbatch' in clustermethod:
            compMinibatchKmean(celltype_broad,nclusterlist,n_pcs)

In [71]:
# separate plots by region and cell types
for s in plot_samples.keys():
    print(s)
    sampleidx=plot_samples[s]
    celltype_broad=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'top_level']
    celltype_sub=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'cell_type_label']
    region=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']
#     sobj_coord_np=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,['x','y']].to_numpy()
    
    origCT=np.unique(celltype_broad)
    celltypeplot=np.concatenate((origCT,list(combineCelltype.keys())),axis=None)
    for xcorr in plot_sample_X:
        samplename=s+'X_'+xcorr
        featureDE=np.copy(featureslist[samplename])

        if inverseAct:
            samplename+='_beforeAct'
        sampledir=os.path.join(plotsavepath,samplename)
        if not os.path.exists(sampledir):
            os.mkdir(sampledir)
            
        for r in np.unique(region):
            print(r)
            ridx=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']==r
            for reg in celltypeplot:
                if not (reg in ['Micro','CA1','Ex']):
                    continue
                print(reg)
                clustersavedir=os.path.join(plotsavepath,samplename,'cluster'+'_'+reg,r)

                if reg in origCT:
                    ct_idx=celltype_broad==reg
                else:
                    ct_idx=False
                    for i in combineCelltype[reg]:
                        ct_idx=np.logical_or(ct_idx,celltype_broad==i)
                
                reg_idx=np.logical_and(ridx,ct_idx)
                
                if np.unique(celltype_sub[reg_idx]).size==1:
                    continue
                
                if 'leiden' in clustermethod:
                    compLeiden(celltype_sub[reg_idx],n_neighbors,n_pcs,min_dist,resolution)
                if 'dbscan' in clustermethod:
                    compDBscan(celltype_sub[reg_idx],epslist,min_sampleslist,n_pcs)
                if 'agglomerative' in clustermethod:
                    compAgg(celltype_sub[reg_idx],nclusterlist,aggMetric,n_pcs)
                if 'kmeanbatch' in clustermethod:
                    compMinibatchKmean(celltype_sub[reg_idx],nclusterlist,n_pcs)


disease13
Cortex
CA1
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/Cortex/leiden_nn10mdist025n_pcs40res0.05epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/Cortex/leiden_nn10mdist025n_pcs40res0.1epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/Cortex/leiden_nn10mdist025n_pcs40res0.2epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/Cortex/leiden_nn10mdist025n_pcs40res0.3epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/Cortex/leiden_nn10mdist025n_pcs40res0.5epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/Cortex/leiden_nn10mdist025n_pcs

White Matter
CA1
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/White Matter/leiden_nn10mdist025n_pcs40res0.05epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/White Matter/leiden_nn10mdist025n_pcs40res0.1epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/White Matter/leiden_nn10mdist025n_pcs40res0.2epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/White Matter/leiden_nn10mdist025n_pcs40res0.3epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/White Matter/leiden_nn10mdist025n_pcs40res0.5epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease13X_logminmax_beforeAct/cluster_CA1/White

Micro
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control13X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.05epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control13X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.1epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control13X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.2epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control13X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.3epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control13X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.5epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control13X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40r

Micro
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.05epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.1epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.2epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.3epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.5epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/disease8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.8e

Micro
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.05epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.1epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.2epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.3epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.5epoch9990
DNE: /mnt/external_ssd/xinyi/plots/train_gae_starmap/allk20XA_02_dca_over/control8X_logminmax_beforeAct/cluster_Micro/Cortex/leiden_nn10mdist025n_pcs40res0.8e

In [8]:
# combine all latents to one plot 
np.random.seed(seed)
for xcorr in plot_sample_X:
    celltype_broad=None
    celltype_sub=None
    region=None
    samplenameList=None
    sobj_coord_np=None
    
    for s in plot_samples.keys():
        sampleidx=plot_samples[s]        
        samplename=s+'X_'+xcorr
            
        if celltype_broad is None:
            celltype_broad=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'top_level']
            celltype_sub=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'cell_type_label']
            region=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']
            sobj_coord_np=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,['x','y']].to_numpy()
        else:
            celltype_broad=np.concatenate((celltype_broad,scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'top_level']),axis=None)
            celltype_sub=np.concatenate((celltype_sub,scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'cell_type_label']),axis=None)
            region=np.concatenate((region,scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']),axis=None)
            sobj_coord_np=np.concatenate((sobj_coord_np,scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,['x','y']].to_numpy()),axis=0)
       
    origCT=np.unique(celltype_broad)
    celltypeplot=np.concatenate((origCT,list(combineCelltype.keys())),axis=None)
    sampledir=os.path.join(plotsavepath,'combined')
    if inverseAct:
        sampledir+='_beforeAct'
    clustersavedir=os.path.join(sampledir,'cluster')

    if 'leiden' in clustermethod:
        compLeiden(celltype_broad,n_neighbors,n_pcs,min_dist,resolution)
    if 'dbscan' in clustermethod:
        compDBscan(celltype_broad,epslist,min_sampleslist,n_pcs)
    if 'agglomerative' in clustermethod:
        compAgg(celltype_broad,nclusterlist,aggMetric,n_pcs)
    if 'kmeanbatch' in clustermethod:
        compMinibatchKmean(celltype_broad,nclusterlist,n_pcs)
#     #by region
#     for reg in np.unique(region):
#         savedir=os.path.join(sampledir,'embedding_'+plottype+'_'+reg)
#         clustersavedir=os.path.join(sampledir,'cluster'+'_'+reg)

#         reg_idx=region==reg

#         #by region and celltype
#         for ct in celltypeplot:
# #             if not ((reg=='Cortex' and ct in ['Ex']) or (reg=='Hippocampus' and ct in ['CA1','DG','Micro','CA'])):
#             if not (ct in ['Micro']):
#                 continue
#             print(reg+ct)
#             clustersavedir=os.path.join(sampledir,'cluster'+'_'+reg+ct)
            
#             if ct in origCT:
#                 ct_idx=celltype_broad==ct
#             else:
#                 ct_idx=False
#                 for i in combineCelltype[ct]:
#                     ct_idx=np.logical_or(ct_idx,celltype_broad==i)
#             ct_idx=np.logical_and(reg_idx,ct_idx)      
            
#             if np.sum(ct_idx)<3:
#                 continue
            
#             if 'leiden' in clustermethod:
#                 compLeiden(celltype_sub[ct_idx],n_neighbors,n_pcs,min_dist,resolution)
#             if 'dbscan' in clustermethod:
#                 compDBscan(celltype_sub[ct_idx],epslist,min_sampleslist,n_pcs)
#             if 'agglomerative' in clustermethod:
#                 compAgg(celltype_sub[ct_idx],nclusterlist,aggMetric,n_pcs)
#             if 'kmeanbatch' in clustermethod:
#                 compMinibatchKmean(celltype_sub[ct_idx],nclusterlist,n_pcs)


DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster2n_pcs40epoch53
DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster3n_pcs40epoch53
DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster4n_pcs40epoch53
DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster5n_pcs40epoch53
DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster8n_pcs40epoch53
DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster10n_pcs40epoch53
DNE: /mnt/external_ssd/xinyi/plots/train_jointGAEcnn_starmap/all_thresh25_01/combined/cluster/minibatchkmean_ncluster15n_pcs40epoch53


In [73]:
# combine all latents to one plot -- by samples
np.random.seed(seed)
for xcorr in plot_sample_X:
    samplenameList=None
    celltype_broad=None
    region=None

    for s in plot_samples.keys():
        sampleidx=plot_samples[s]        
        samplename=s+'X_'+xcorr
            
        if samplenameList is None:
            samplenameList=np.repeat(s,np.sum(scaleddata.obs['sample']==sampleidx))
            celltype_broad=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'top_level']
            region=scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']            
        else:
            celltype_broad=np.concatenate((celltype_broad,scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'top_level']),axis=None)
            samplenameList=np.concatenate((samplenameList,np.repeat(s,np.sum(scaleddata.obs['sample']==sampleidx))),axis=None)
            region=np.concatenate((region,scaleddata.obs.loc[scaleddata.obs['sample']==sampleidx,'region']),axis=None)

    origCT=np.unique(celltype_broad)
    celltypeplot=np.concatenate((origCT,list(combineCelltype.keys())),axis=None)
    sampledir=os.path.join(plotsavepath,'combined'+xcorr)
    if inverseAct:
        sampledir+='_beforeAct'
    clustersavedir=os.path.join(sampledir,'cluster')

    if 'leiden' in clustermethod:
        compLeiden(samplenameList,n_neighbors,n_pcs,min_dist,resolution,'Sample')
    if 'dbscan' in clustermethod:
        compDBscan(samplenameList,epslist,min_sampleslist,n_pcs,'Sample')
    if 'agglomerative' in clustermethod:
        compAgg(samplenameList,nclusterlist,aggMetric,n_pcs,'Sample')
    if 'kmeanbatch' in clustermethod:
        compMinibatchKmean(samplenameList,nclusterlist,n_pcs,'Sample')
    #by region
    for reg in np.unique(region):
        savedir=os.path.join(sampledir,'embedding_'+plottype+'_'+reg)
        clustersavedir=os.path.join(sampledir,'cluster'+'_'+reg)

        reg_idx=region==reg

        #by region and celltype
        for ct in celltypeplot:
#             if not ((reg=='Cortex' and ct in ['Ex']) or (reg=='Hippocampus' and ct in ['CA1','DG','Micro','CA'])):
            if not (ct in ['Micro']):
                continue
            print(reg+ct)
            clustersavedir=os.path.join(sampledir,'cluster'+'_'+reg+ct)
            
            if ct in origCT:
                ct_idx=celltype_broad==ct
            else:
                ct_idx=False
                for i in combineCelltype[ct]:
                    ct_idx=np.logical_or(ct_idx,celltype_broad==i)
            ct_idx=np.logical_and(reg_idx,ct_idx)      
            
            if np.sum(ct_idx)<3:
                continue
            
            if 'leiden' in clustermethod:
                compLeiden(celltype_sub[ct_idx],n_neighbors,n_pcs,min_dist,resolution,'Sample')
            if 'dbscan' in clustermethod:
                compDBscan(celltype_sub[ct_idx],epslist,min_sampleslist,n_pcs,'Sample')
            if 'agglomerative' in clustermethod:
                compAgg(celltype_sub[ct_idx],nclusterlist,aggMetric,n_pcs,'Sample')
            if 'kmeanbatch' in clustermethod:
                compMinibatchKmean(celltype_sub[ct_idx],nclusterlist,n_pcs,'Sample')


CortexMicro
HippocampusMicro
White MatterMicro
