In [1]:
import time
import os

import scanpy as sc
import numpy as np
import scipy.sparse as sp

import torch
from torch import optim
from torch.utils.data import DataLoader

import models.loadImg as loadImg
import models.modelsCNN as modelsCNN
import models.optimizer as optimizer

import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import gc
from skimage import io
import umap
from sklearn.cluster import MiniBatchKMeans,AgglomerativeClustering
from sklearn.decomposition import PCA

import anndata as ad

In [2]:
sc.settings.verbosity = 3



In [3]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "0,1,2,3" 
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" 
use_cuda=True


In [4]:
radius=48
datadir='/media/xinyi/dcis2idc/data'
sampleList=[]
for s in os.listdir('/media/xinyi/dcis2idc/data'):
    if 'hoechst'  in s and ('_1_' in s or '_3_' in s or '_4_' in s or '_6_' in s):
#     if 'hoechst'  in s and ('_3_' in s or '_4_' in s or '_6_' in s):
        sampleList.append(s)
print(sampleList)
trainingCores={'br1003a':[],'br8018a':[],'br301':[]}
for s in range(1,11):
# for s in range(5,11):
    trainingCores['br1003a'].append('A'+str(s))
    trainingCores['br1003a'].append('C'+str(s))
    trainingCores['br1003a'].append('I'+str(s))
for s in range(1,11):
    if s<9:
        trainingCores['br8018a'].append('H'+str(s))
    trainingCores['br8018a'].append('A'+str(s))
    trainingCores['br8018a'].append('B'+str(s))
    trainingCores['br8018a'].append('F'+str(s))
for s in range(1,7):
    for sr in ['A','B','C','D','E']:
        trainingCores['br301'].append(sr+str(s))

['br1003a_1_cytokeratin_555_aSMA_647_hoechst', 'br1003a_3_collagen1_647_hoechst', 'br1003a_4_cytokeratin_555_gh2ax_647_hoechst', 'br301_4_cytokeratin_555_aSMA_647_hoechst', 'br301_6_collagen1_647_hoechst', 'br8018a_1_cytokeratin_555_aSMA_647_hoechst', 'br8018a_3_collagen1_647_hoechst', 'br8018a_4_cytokeratin_555_gh2ax_647_hoechst']


In [5]:
with open(os.path.join(datadir,'processed','latent311'), 'rb') as input:
    latent=pickle.load(input)

In [6]:
name='exp0'
logsavepath='/media/xinyi/dcis2idc/log/cnnvae'+name
modelsavepath='/media/xinyi/dcis2idc/models/cnnvae'+name
plotsavepath='/media/xinyi/dcis2idc/plots/cnnvae'+name
sampledir=plotsavepath
clustersavedir_alltrain=os.path.join(sampledir,'cluster_alltrain_reordered')
if not os.path.exists(clustersavedir_alltrain):
    os.mkdir(clustersavedir_alltrain)

In [7]:
ep=311

In [8]:
with open(os.path.join(datadir,'processed','train_cnnvae_names'), 'rb') as input:
    allImgNames=pickle.load(input)
#plot by disease progression
br1003aSpecs=pd.read_excel('/media/xinyi/dcis2idc/data/BR1003a specs.xlsx',header=10)
br301Specs=pd.read_excel('/media/xinyi/dcis2idc/data/BR301 specs.xlsx',header=10)
br8018aSpecs=pd.read_excel('/media/xinyi/dcis2idc/data/BR8018a specs.xlsx',header=10)
br1003aSpecs.index=br1003aSpecs.loc[:,'Position']
br301Specs.index=br301Specs.loc[:,'Position']
br8018aSpecs.index=br8018aSpecs.loc[:,'Position']

progList=np.copy(allImgNames)
for s in np.unique(allImgNames):
    ssplit=s.split('_')
    if 'br1003a'==ssplit[0]:
        prog_s=br1003aSpecs.loc[(ssplit[-1],'Pathology diagnosis')]
    elif 'br301'==ssplit[0]:
        prog_s=br301Specs.loc[(ssplit[-1],'Pathology diagnosis')]
    elif 'br8018a'==ssplit[0]:
        prog_s=br8018aSpecs.loc[(ssplit[-1],'Pathology diagnosis')]
    progList[allImgNames==s]=prog_s
    


## combine subclusters to same trajectory

In [12]:
ncluster=8
n_neighbors=10
min_dist=0.25
n_pcs=50
plotepoch=ep
savenameAdd='_plottingIdx_progBalanced_'+str(0)
savenamecluster='minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+savenameAdd
with open(os.path.join(plotsavepath,'cluster',savenamecluster+'_estimator'), 'rb') as output:
    kmeansestimator=pickle.load(output)

savenamepca='pca_epoch'+str(plotepoch)+savenameAdd
with open(os.path.join(plotsavepath,'cluster',savenamepca), 'rb') as output:
    pca=pickle.load(output)
    
subclusternumbers=[4,6,8,6,6,6,6,4]
kmeansestimator_sub={}
pca_sub={}
for c in range(ncluster):
    subclustersavedir=os.path.join(plotsavepath,'cluster',savenamecluster+'_subcluster'+str(c))
    savenamecluster_sub='minibatchkmean_ncluster'+str(subclusternumbers[c])+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+savenameAdd
    with open(os.path.join(subclustersavedir,savenamecluster_sub+'_estimator'), 'rb') as output:
        kmeansestimator_sub[c]=pickle.load(output)
    with open(os.path.join(subclustersavedir,savenamepca), 'rb') as output:
        pca_sub[c]=pickle.load(output)
        
#predict kmeans
neworder=[1, 5, 3, 7, 2, 0, 4, 6]
if not os.path.exists(clustersavedir_alltrain):
    os.mkdir(clustersavedir_alltrain)
clusterplotdir_alltrain=os.path.join(clustersavedir_alltrain,'plots')
if not os.path.exists(clusterplotdir_alltrain):
    os.mkdir(clusterplotdir_alltrain)
clusterRes=kmeansestimator.predict(pca.transform(latent)[:,:n_pcs])
clusterRes_reordered=np.zeros_like(clusterRes)
for c in range(ncluster):
    cold=neworder[c]
    clusterRes_reordered[clusterRes==cold]=c

with open(os.path.join(clustersavedir_alltrain,'minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+'_all'), 'wb') as output:
    pickle.dump(clusterRes_reordered, output, pickle.HIGHEST_PROTOCOL)

gc.collect()

savenamecluster='minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+savenameAdd
for cnew in np.unique(clusterRes_reordered):
    c=neworder[cnew]
    subclustersavedir_alltrain=os.path.join(clustersavedir_alltrain,savenamecluster+'_subcluster'+str(c))
    subclusterplotdir_alltrain=os.path.join(subclustersavedir_alltrain,'plots')
    if not os.path.exists(subclustersavedir_alltrain):
        os.mkdir(subclustersavedir_alltrain)
    if not os.path.exists(subclusterplotdir_alltrain):
        os.mkdir(subclusterplotdir_alltrain)
    clusterRes_sub=kmeansestimator_sub[c].predict(pca_sub[c].transform(latent[clusterRes_reordered==cnew])[:,:n_pcs])
    with open(os.path.join(subclustersavedir_alltrain,'minibatchkmean_ncluster'+str(subclusternumbers[c])+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+'_all'), 'wb') as output:
        pickle.dump(clusterRes_sub, output, pickle.HIGHEST_PROTOCOL)




In [9]:
with open(os.path.join(plotsavepath,'cluster_reordered','minibatchkmean_ncluster'+str(8)+'n_pcs'+str(50)+'epoch'+str(ep)+'_plottingIdx_progBalanced_'+str(0)+'_reordered'+'_subcluster_all','pagaPos_reordered'), 'rb') as output:
    trainPagaPos=pickle.load(output)

In [11]:
# savenamesample='alltrain'

# neworder=[1, 5, 3, 7, 2, 0, 4, 6]
# #use chosen subcluster number and save plots
# sc.settings.verbosity = 3
# # subcluster=8
# subclusterDict={0:[4],1:[6],2:[8],3:[6],4:[6],5:[6],6:[6],7:[4]}
# ncluster=8

# plotepoch=311
# clusterplotdir=os.path.join(clustersavedir_alltrain,'plots')
# n_pcs=50
# savenamecluster='minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)
# with open(os.path.join(clustersavedir_alltrain,'minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+'_all'), 'rb') as output:
#     clusterRes=pickle.load(output)

# latent_adata=ad.AnnData(latent)
# kmeans_sub=(np.zeros(clusterRes.size)-1).astype(str)
# savenameAdd='_plottingIdx_progBalanced_'+str(0)
# subclusternumbers=[4,6,8,6,6,6,6,4]
# savenamecluster='minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+savenameAdd
# for cnew in np.unique(clusterRes):
# #     print('cluster'+str(c))
#     c=neworder[cnew]
    
#     subclustersavedir_alltrain=os.path.join(clustersavedir_alltrain,savenamecluster+'_subcluster'+str(c))
#     with open(os.path.join(subclustersavedir_alltrain,'minibatchkmean_ncluster'+str(subclusternumbers[c])+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+'_all'), 'rb') as output:
#         subclusterRes=pickle.load(output)
#     print(np.unique(subclusterRes))
#     kmeans_sub[clusterRes==cnew]=np.char.add(np.repeat(str(cnew)+'-',subclusterRes.size),subclusterRes.astype(str))
        
# latent_adata.obs['kmeans_sub']=kmeans_sub
# latent_adata.obs['kmeans_sub']=latent_adata.obs['kmeans_sub'].astype('category')
# latent_adata.obs['kmeans']=clusterRes.astype(str)
# latent_adata.obs['kmeans']=latent_adata.obs['kmeans'].astype('category')


# subclusterplotdir=os.path.join(clustersavedir_alltrain,savenamecluster+'_subcluster_all')
# if not os.path.exists(subclusterplotdir):
#     os.mkdir(subclusterplotdir)


# sc.settings.figdir=os.path.join(subclusterplotdir)

np.random.seed(3)
sc.tl.pca(latent_adata, svd_solver='arpack')
sc.pp.neighbors(latent_adata, n_neighbors=4, n_pcs=20)
sc.tl.paga(latent_adata, groups='kmeans_sub')
plt.rcParams["figure.figsize"] = (10, 10)
plt.rcParams['figure.dpi'] = 1200
# # sc.pl.paga(latent_adata, color=['kmeans'],save='graph.png') #default threshold is 0.01
sc.pl.paga(latent_adata, color=['kmeans'],pos=trainPagaPos,fontsize=18,fontoutline=1,max_edge_width=10, node_size_scale=5,node_size_power=1,save='graph_thresh001.pdf',threshold=0.01,show=False,random_state=6)
# sc.tl.umap(latent_adata, init_pos='paga')
# sc.pl.umap(latent_adata,color=['kmeans'],legend_loc='on data',save='graph_umap.pdf',show=False)





computing PCA
    with n_comps=50
    finished (0:03:14)
computing neighbors
    using 'X_pca' with n_pcs = 20
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:01:37)
running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:02:07)
--> added 'pos', the PAGA positions (adata.uns['paga'])
computing UMAP


KeyboardInterrupt: 

## color gradient by proteins

In [12]:
with open(os.path.join(datadir,'processed','train_cnnvae_cellLabels'), 'rb') as output: 
    cellIDlist=pickle.load(output)

In [13]:
stats2plot=['int_median','int_mode','int_mean']
stains=['aSMA','cytokeratin','collagen1','gh2ax']
uniquenames,nameIdx=np.unique(allImgNames,return_index=True)

ncluster=8
plotepoch=311
    
allstats=None
allstats_nmco=None
alllabels=None
allvarnames=None

minMinDict={'aSMA':[],'cytokeratin':[],'collagen1':[],'gh2ax':[]}

allidx=None
idx_temp=np.arange(allImgNames.size)
for sidx in range(uniquenames.size):
    s=np.unique(allImgNames)[sidx]
    plottingIdx_i_s=np.arange(allImgNames.size)[allImgNames==s]-nameIdx[sidx]

    print(s)
    assert np.min(plottingIdx_i_s)>=0
#         ssplit=s.split('_')
    slabels=kmeans_sub[allImgNames==s]
    sidx=idx_temp[allImgNames==s]
    
    for stain_s in stains:
        if not stain_s in s:
            continue
        path_s=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'cellular_'+stain_s.lower(),s.split('_')[-1] +'.csv')
        if not os.path.exists(path_s):
            continue
        print(stain_s)
        path_s_nmco=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'nmco_features',s.split('_')[-1] +'.csv')
        
        stats_s=pd.read_csv(path_s)
        stats_s.index=stats_s.loc[:,'label']
                                                
        stats_s_nmco=pd.read_csv(path_s_nmco)
        stats_s_nmco.index=stats_s.loc[:,'label']
        stats_s_nmco=stats_s_nmco.loc[cellIDlist[s][plottingIdx_i_s],'area'].to_numpy()
        
        minMin=np.min(stats_s.loc[:,'int_min'])
        print(minMin)
        
        minMinDict[stain_s].append(minMin)
        if allstats is None:
            allstats=stats_s.loc[cellIDlist[s][plottingIdx_i_s],stats2plot].to_numpy()-minMin
            alllabels=np.copy(slabels)
            allvarnames=np.repeat(stain_s,slabels.size)
            allidx=np.copy(sidx)
            
            allstats_nmco=stats_s_nmco
        else:
            allstats=np.concatenate((allstats,stats_s.loc[cellIDlist[s][plottingIdx_i_s],stats2plot].to_numpy()-minMin),axis=0)
            alllabels=np.concatenate((alllabels,np.copy(slabels)))
            allvarnames=np.concatenate((allvarnames,np.repeat(stain_s,slabels.size)))
            allidx=np.concatenate((allidx,sidx))
            allstats_nmco=np.concatenate((allstats_nmco,stats_s_nmco))
            
            

br1003a_1_cytokeratin_555_aSMA_647_hoechst_A1
aSMA
686.0
cytokeratin
1532.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A2
aSMA
682.0
cytokeratin
1804.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A4
aSMA
641.0
cytokeratin
1870.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A5
aSMA
644.0
cytokeratin
1875.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A6
aSMA
618.0
cytokeratin
2053.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A7
aSMA
656.0
cytokeratin
2074.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A8
aSMA
632.0
cytokeratin
2089.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_A9
aSMA
674.0
cytokeratin
2265.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_C1
aSMA
513.0
cytokeratin
2245.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_C10
aSMA
503.0
cytokeratin
735.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_C2
aSMA
585.0
cytokeratin
2405.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_C3
aSMA
566.0
cytokeratin
2311.0
br1003a_1_cytokeratin_555_aSMA_647_hoechst_C4
aSMA
499.0
cytokeratin
2413.0
br1003a_1_cy

420.0
br301_6_collagen1_647_hoechst_D2
collagen1
376.0
br301_6_collagen1_647_hoechst_D3
collagen1
369.0
br301_6_collagen1_647_hoechst_D4
collagen1
202.0
br301_6_collagen1_647_hoechst_D5
collagen1
1188.0
br301_6_collagen1_647_hoechst_D6
collagen1
1027.0
br301_6_collagen1_647_hoechst_E1
collagen1
524.0
br301_6_collagen1_647_hoechst_E2
collagen1
711.0
br301_6_collagen1_647_hoechst_E3
collagen1
570.0
br301_6_collagen1_647_hoechst_E4
collagen1
188.0
br301_6_collagen1_647_hoechst_E5
collagen1
689.0
br301_6_collagen1_647_hoechst_E6
collagen1
1423.0
br8018a_1_cytokeratin_555_aSMA_647_hoechst_A1
aSMA
674.0
cytokeratin
1862.0
br8018a_1_cytokeratin_555_aSMA_647_hoechst_A10
aSMA
692.0
cytokeratin
1517.0
br8018a_1_cytokeratin_555_aSMA_647_hoechst_A2
aSMA
680.0
cytokeratin
1702.0
br8018a_1_cytokeratin_555_aSMA_647_hoechst_A3
aSMA
694.0
cytokeratin
1631.0
br8018a_1_cytokeratin_555_aSMA_647_hoechst_A4
aSMA
1145.0
cytokeratin
2565.0
br8018a_1_cytokeratin_555_aSMA_647_hoechst_A5
aSMA
688.0
cytokeratin
1

### threshold protein

In [14]:
minCell=5

In [15]:
allstatslog_std1=np.log(allstats[:,2]+1)
for v in np.unique(allvarnames):
    meanProt=np.mean(np.log(allstats[allvarnames==v,2]+1)) #mean
    stdProt=np.std(np.log(allstats[allvarnames==v,2]+1))
    zeroIdx=np.arange(allvarnames.shape[0])[allvarnames==v][allstatslog_std1[allvarnames==v]<(meanProt+stdProt)]
    allstatslog_std1[zeroIdx]=0
    
    plotidx=allidx[allvarnames==v]
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[plotidx]=allstatslog_std1[allvarnames==v]
    latent_adata.obs[v+'log']=protvalues

In [193]:
from util.plot_new import *

In [60]:
plotembeddingbyCT(np.array(latent_adata.obs['kmeans']).astype(int),'asma_cytokeratin_mean_std1',[],latent_adata.obs.loc[:,('aSMAlog','cytokeratinlog')].to_numpy(),subclusterplotdir,'asma vs cytokeratin mean',plotdimx=0,plotdimy=1,savenameAdd='',s=0.1)

In [18]:
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
# sc.tl.umap(latent_adata, init_pos='paga')
for v in np.unique(allvarnames):
#     plotidx=allidx[allvarnames==v]
#     protvalues=np.repeat(np.nan,latent_adata.shape[0])
#     protvalues[plotidx]=np.log(allstats[allvarnames==v,0]+1)
#     latent_adata.obs[v+'log']=protvalues

    sc.settings.figdir=os.path.join(subclusterplotdir)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v+'log'])]
    latent_adata_sub.uns['kmeans_sub_sizes']=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)[1]
    sc.pl.paga(latent_adata_sub, color=[v+'log'],pos=trainPagaPos,fontsize=18,fontoutline=1,max_edge_width=10, node_size_scale=5,node_size_power=1,save='graph'+v+'_allcells_log.pdf',show=False,random_state=6)
#     sc.pl.umap(latent_adata_sub,color=[v+'log'],legend_loc='on data',save='graph_umap'+v+'_log.png',show=False,size=3)

gc.collect() 



  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


64837

In [228]:
for v in np.unique(allvarnames):
    protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
    protvalues[protvalues>0]=1
    print(protvalues)



[ 0.  0.  1. ... nan nan nan]
[0. 1. 1. ... 0. 0. 0.]


In [188]:
node_size_scale#%cells with positive expression

for v in np.unique(allvarnames):
    protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
    protvalues[protvalues>0]=1
    latent_adata.obs[v+'binary']=protvalues

    sc.settings.figdir=os.path.join(subclusterplotdir)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v+'log'])]
    latent_adata_sub.uns['kmeans_sub_sizes']=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)[1]
    sc.pl.paga(latent_adata_sub, color=[v+'binary'],pos=trainPagaPos,node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells_PosFrac.png',show=False,random_state=6)
gc.collect()

In [210]:
#by pathology; exprs and %positive
#changed paga plotting function to enable vmax and vmin
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
latent_adata.obs['prog']=progList
vminlist={'aSMA':0,'collagen1':0,'cytokeratin':0,'gh2ax':0}
vmaxlist={'aSMA':3.5,'collagen1':7.5,'cytokeratin':5.5,'gh2ax':4.6}
vminlistFrac={'aSMA':0,'collagen1':0,'cytokeratin':0,'gh2ax':0}
vmaxlistFrac={'aSMA':0.41,'collagen1':0.9,'cytokeratin':0.65,'gh2ax':0.6}
# for v in np.unique(allvarnames):
for v in vminlist.keys():
    vmin=vminlist[v]
    vmax=vmaxlist[v]
    vminFrac=vminlistFrac[v]
    vmaxFrac=vmaxlistFrac[v]
    for prog in np.unique(progList[np.isfinite(latent_adata.obs[v+'log'])]):
        vprog=v+'_log_'+prog
        
        if not os.path.exists(os.path.join(subclusterplotdir,v)):
            os.mkdir(os.path.join(subclusterplotdir,v))
        sc.settings.figdir=os.path.join(subclusterplotdir,v)
        
        latent_adata_sub=latent_adata[np.logical_and(np.isfinite(latent_adata.obs[v+'log'].to_numpy()),progList==prog)]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]
        
        sc.pl.paga(latent_adata_sub, color=[v+'log'],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#         sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata_sub,color=[v+'log'],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')

        sc.pl.paga(latent_adata_sub, color=[v+'binary'],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells_PosFrac.png',show=False,random_state=6,vmin=vminFrac,vmax=vmaxFrac,cmap='jet')



  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


In [211]:
latent_adata_sub=None
gc.collect()

760395

##### separate protein by duct

#### add distance to duct - 0 is in duct

In [19]:
import sklearn.metrics 
with open(os.path.join(datadir,'processed','train_cnnvae_coord'), 'rb') as output:
    coordlist=pickle.load(output)
    
#distance to nearest cell in duct
uniquenames,nameIdx=np.unique(allImgNames,return_index=True)

allstats=None
alllabels=None
allprog=None

allidx=None
idx_temp=np.arange(allImgNames.size)
for sidx in range(uniquenames.size):
    s=np.unique(allImgNames)[sidx]
    
    path_s=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'duct_nuc_membership',s.split('_')[-1] +'.csv')
    if not os.path.exists(path_s):
        continue
    print(s)
    
    plottingIdx_i_s=np.arange(allImgNames.size)[allImgNames==s]-nameIdx[sidx]
    assert np.min(plottingIdx_i_s)>=0
#         ssplit=s.split('_')
    slabels=kmeans_sub[allImgNames==s]
    
    
    sidx_plot=idx_temp[allImgNames==s]

    if plottingIdx_i_s.size==0:
        continue
    assert np.min(plottingIdx_i_s)>=0

    stats_s_all=pd.read_csv(path_s)
    stats_s_all.index=stats_s_all.loc[:,'label']
    stats_s=stats_s_all.loc[cellIDlist[s][plottingIdx_i_s],'int_median'].to_numpy()

    stats_s_all=stats_s_all.loc[cellIDlist[s],'int_median'].to_numpy()
    scoord_all_duct=coordlist[allImgNames==s][stats_s_all>0]

#         ssplit=s.split('_')
    sprog=progList[allImgNames==s]


    scoord=coordlist[allImgNames==s][stats_s==0]

    _,distS=sklearn.metrics.pairwise_distances_argmin_min(scoord,scoord_all_duct)
    distAll=np.zeros(stats_s.shape[0])
    distAll[stats_s==0]=distS
#     distances=np.concatenate((distances,distS))
#     sublabels=np.concatenate((sublabels,slabels[stats_s==0]))
#     subProgs=np.concatenate((subProgs,sprog[stats_s==0]))

    if allstats is None:
        allstats=np.copy(distAll)
        alllabels=np.copy(slabels)
        allprog=np.copy(sprog)
        allidx=np.copy(sidx_plot)
    else:
        allstats=np.concatenate((allstats,distAll))
        alllabels=np.concatenate((alllabels,np.copy(slabels)))
        allprog=np.concatenate((allprog,sprog))
        allidx=np.concatenate((allidx,sidx_plot))
            
            

br1003a_1_cytokeratin_555_aSMA_647_hoechst_I2
br1003a_1_cytokeratin_555_aSMA_647_hoechst_I3
br1003a_1_cytokeratin_555_aSMA_647_hoechst_I7
br1003a_1_cytokeratin_555_aSMA_647_hoechst_I8
br1003a_1_cytokeratin_555_aSMA_647_hoechst_I9
br301_4_cytokeratin_555_aSMA_647_hoechst_A1
br301_4_cytokeratin_555_aSMA_647_hoechst_A2
br301_4_cytokeratin_555_aSMA_647_hoechst_A3
br301_4_cytokeratin_555_aSMA_647_hoechst_A4
br301_4_cytokeratin_555_aSMA_647_hoechst_A5
br301_4_cytokeratin_555_aSMA_647_hoechst_A6
br301_4_cytokeratin_555_aSMA_647_hoechst_B1
br301_4_cytokeratin_555_aSMA_647_hoechst_B2
br301_4_cytokeratin_555_aSMA_647_hoechst_B3
br301_4_cytokeratin_555_aSMA_647_hoechst_B4
br301_4_cytokeratin_555_aSMA_647_hoechst_B5
br301_4_cytokeratin_555_aSMA_647_hoechst_B6
br301_4_cytokeratin_555_aSMA_647_hoechst_C1
br301_4_cytokeratin_555_aSMA_647_hoechst_C2
br301_4_cytokeratin_555_aSMA_647_hoechst_C3
br301_4_cytokeratin_555_aSMA_647_hoechst_C4
br301_4_cytokeratin_555_aSMA_647_hoechst_C5
br301_4_cytokeratin_55

In [20]:
v='dist2duct_log'
protvalues=np.repeat(np.nan,latent_adata.shape[0])
protvalues[allidx]=np.log(allstats+1)
latent_adata.obs[v]=protvalues

v='outDuct'
protvalues=np.repeat(np.nan,latent_adata.shape[0])
allstats_binary=np.zeros(allstats.size)
allstats_binary[allstats>0]=1
protvalues[allidx]=allstats_binary
latent_adata.obs[v]=protvalues

v='dist2duct_log_outOnly'
protvalues=np.repeat(np.nan,latent_adata.shape[0])
protvalues[allidx[allstats>0]]=np.log(allstats[allstats>0]+1)
latent_adata.obs[v]=protvalues

In [213]:
#protein by pathology -- separate by in vs out of duct
#changed paga plotting function to enable vmax and vmin
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
vminlist={'aSMA':0,'collagen1':0,'cytokeratin':0,'gh2ax':0}
vmaxlist={'aSMA':3.5,'collagen1':7.5,'cytokeratin':5.5,'gh2ax':4.6}
vminlistFrac={'aSMA':0,'collagen1':0,'cytokeratin':0,'gh2ax':0}
vmaxlistFrac={'aSMA':0.41,'collagen1':0.9,'cytokeratin':0.65,'gh2ax':0.6}
for v in vminlist.keys():
    vmin=vminlist[v]
    vmax=vmaxlist[v]
    vminFrac=vminlistFrac[v]
    vmaxFrac=vmaxlistFrac[v]
    for prog in np.unique(progList[np.isfinite(latent_adata.obs[v+'log'])]):
        vprog=v+'_log_'+prog+'_induct'
        vprog_bin=v+'_binary_'+prog+'_induct'
        protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
        protvalues[latent_adata.obs['outDuct']!=0]=np.nan
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=np.copy(protvalues)
        protvalues[protvalues>0]=1
        latent_adata.obs[vprog_bin]=np.copy(protvalues)

        if not os.path.exists(os.path.join(subclusterplotdir,v,'duct')):
            os.mkdir(os.path.join(subclusterplotdir,v,'duct'))
        sc.settings.figdir=os.path.join(subclusterplotdir,v,'duct')

        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[vprog].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]

            sc.pl.paga(latent_adata_sub, color=[vprog],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.umap(latent_adata_sub,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.paga(latent_adata_sub, color=[vprog_bin],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells_PosFrac.png',show=False,random_state=6,vmin=vminFrac,vmax=vmaxFrac,cmap='jet')


        
        vprog=v+'_log_'+prog+'_outduct'
        vprog_bin=v+'_binary_'+prog+'_outduct'
        protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
        protvalues[latent_adata.obs['outDuct']!=1]=np.nan
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=np.copy(protvalues)
        protvalues[protvalues>0]=1
        latent_adata.obs[vprog_bin]=np.copy(protvalues)
        
        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[vprog].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]

            sc.settings.figdir=os.path.join(subclusterplotdir,v,'duct')

            sc.pl.paga(latent_adata_sub, color=[vprog],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.umap(latent_adata_sub,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.paga(latent_adata_sub, color=[vprog_bin],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells_PosFrac.png',show=False,random_state=6,vmin=vminFrac,vmax=vmaxFrac,cmap='jet')




  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


#### plot distance

In [25]:
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
v='dist2duct_log'
sc.settings.figdir=os.path.join(subclusterplotdir)
sc.pl.paga(latent_adata, color=[v],pos=trainPagaPos,vmin=0.9,vmax=5.1,fontsize=18,fontoutline=1,max_edge_width=10, node_size_scale=5,node_size_power=1,save='graph'+v+'_allcells.pdf',show=False,random_state=6,cmap='jet')
# # sc.tl.umap(latent_adata, init_pos='paga')
# sc.pl.umap(latent_adata,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3)




--> added 'pos', the PAGA positions (adata.uns['paga'])


<Axes:>

In [30]:
allImgNames.size

1876734

In [26]:
#by prog
latent_adata.obs['prog']=progList
# vmin=0
# vmax=np.max(latent_adata.obs['dist2duct_log'])
vmin=0.9
vmax=5.1
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    v='dist2duct_log_'+prog
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx]=np.log(allstats+1)
    protvalues[latent_adata.obs['prog']!=prog]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



#         sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],fontsize=18,fontoutline=1,max_edge_width=10, node_size_scale=5,node_size_power=1,save='graph'+v+'_allcells.pdf',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#     #     sc.tl.umap(latent_adata, init_pos='paga')
#         sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)




dist2duct_log_Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Micropapillary type ductal carcinoma in situ wi


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [33]:
subclusterplotdir

'/media/xinyi/dcis2idc/plots/cnnvaeexp0/cluster_alltrain_reordered/minibatchkmean_ncluster8n_pcs50epoch311_plottingIdx_progBalanced_0_subcluster_all'

In [40]:
#by sample 
latent_adata.obs['prog']=progList
latent_adata.obs['samplename']=allImgNames
# vmin=0
# vmax=np.max(latent_adata.obs['dist2duct_log'])
vmin=0.9
vmax=5.1
# for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
subclusterplotdir_samples=os.path.join(subclusterplotdir,'dist2duct_bySample')
if not os.path.exists(subclusterplotdir_samples):
    os.mkdir(subclusterplotdir_samples)
for s in np.unique(allImgNames[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    prog=np.unique(progList[allImgNames==s])[0]
    v='dist2duct_log_'+prog+'_'+s
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx]=np.log(allstats+1)
    protvalues[latent_adata.obs['samplename']!=s]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir_samples)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



#         sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],fontsize=18,fontoutline=1,max_edge_width=10, node_size_scale=5,node_size_power=1,save='graph'+v+'_allcells.pdf',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#     #     sc.tl.umap(latent_adata, init_pos='paga')
#         sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)




dist2duct_log_Breast tissue_br1003a_1_cytokeratin_555_aSMA_647_hoechst_I2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Breast tissue_br1003a_1_cytokeratin_555_aSMA_647_hoechst_I3


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Breast tissue_br1003a_1_cytokeratin_555_aSMA_647_hoechst_I7


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Breast tissue_br1003a_1_cytokeratin_555_aSMA_647_hoechst_I8


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Breast tissue_br1003a_1_cytokeratin_555_aSMA_647_hoechst_I9


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_A1


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_A2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ_br301_4_cytokeratin_555_aSMA_647_hoechst_A3


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ_br301_4_cytokeratin_555_aSMA_647_hoechst_A4


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_A5


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_A6


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma_br301_4_cytokeratin_555_aSMA_647_hoechst_B1


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_B2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_B3


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_B4


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_B5


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_B6


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_C1


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_C2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Micropapillary type ductal carcinoma in situ wi_br301_4_cytokeratin_555_aSMA_647_hoechst_C3


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Micropapillary type ductal carcinoma in situ wi_br301_4_cytokeratin_555_aSMA_647_hoechst_C4


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_C5


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_C6


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_D1


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_D2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_D3


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma_br301_4_cytokeratin_555_aSMA_647_hoechst_D4


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_D5


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_D6


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_E1


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_E2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma and breast tissue_br301_4_cytokeratin_555_aSMA_647_hoechst_E3


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma_br301_4_cytokeratin_555_aSMA_647_hoechst_E4


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_E5


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltratio_br301_4_cytokeratin_555_aSMA_647_hoechst_E6


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma_br8018a_1_cytokeratin_555_aSMA_647_hoechst_F2


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma_br8018a_1_cytokeratin_555_aSMA_647_hoechst_F7


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [216]:
#%cells out of duct
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
v='outDuct'
sc.settings.figdir=os.path.join(subclusterplotdir)

sc.pl.paga(latent_adata,pos=trainPagaPos,node_size_power=1,fontoutline=0.5, color=[v],save='graph'+v+'_allcells.png',show=False,random_state=6)



--> added 'pos', the PAGA positions (adata.uns['paga'])


<Axes:>

In [217]:
# %cells out of duct - by prog
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    v='outDuct_'+prog
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx]=allstats_binary
    protvalues[latent_adata.obs['prog']!=prog]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)

    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]


        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')




outDuct_Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ with early infiltratio


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Micropapillary type ductal carcinoma in situ wi


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [21]:
#distance excluding cells in duct

v='dist2duct_log_outOnly'
sc.settings.figdir=os.path.join(subclusterplotdir)

sc.pl.paga(latent_adata, color=[v],pos=trainPagaPos,fontsize=18,fontoutline=1,max_edge_width=10, node_size_scale=5,node_size_power=1,save='graph'+v+'_allcells.pdf',show=False,random_state=6)
# # sc.tl.umap(latent_adata, init_pos='paga')
# sc.pl.umap(latent_adata,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3)




--> added 'pos', the PAGA positions (adata.uns['paga'])


<Axes:>

In [219]:
#distance excluding cells in duct - by prog
vmin=0
vmax=np.max(latent_adata.obs['dist2duct_log_outOnly'])
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    v='dist2duct_log_outOnly_'+prog
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx[allstats>0]]=np.log(allstats[allstats>0]+1)
    protvalues[latent_adata.obs['prog']!=prog]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)

    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
    #     sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)





dist2duct_log_outOnly_Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Ductal carcinoma in situ with early infiltratio


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Micropapillary type ductal carcinoma in situ wi


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [220]:
#%cells within distance threshold of ducts normalized by total outside of ducts - by prog
latent_adata.obs['prog']=progList
distanceThresh=[radius*2*2,radius*2*5, radius*2*10]

if not os.path.exists(os.path.join(subclusterplotdir,'distThresh')):
    os.mkdir(os.path.join(subclusterplotdir,'distThresh'))
for d in distanceThresh:
    for prog in np.unique(progList):
        v='distThresh_'+prog+'_'+str(d)
        protvalues=np.repeat(np.nan,latent_adata.shape[0])
        protvalues[allidx[allstats>0]]=1
        protvalues[allidx[allstats>d]]=0
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[v]=protvalues
        sc.settings.figdir=os.path.join(subclusterplotdir,'distThresh')
        
        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
            print(v)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



            sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')


distThresh_Breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ with early infiltratio_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Micropapillary type ductal carcinoma in situ wi_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ with early infiltratio_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Micropapillary type ductal carcinoma in situ wi_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ with early infiltratio_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Micropapillary type ductal carcinoma in situ wi_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


### plot distance of protein positive cells

In [223]:
# %cells out of duct - by prog
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    for prot in vminlist.keys():
        v='outDuct_'+prog+prot
        protvalues=latent_adata.obs['outDuct_'+prog].to_numpy(copy=True)
        protvalues[np.logical_not(latent_adata.obs[prot+'log']>0)]=np.nan
        latent_adata.obs[v]=protvalues
        if not os.path.exists(os.path.join(subclusterplotdir,prot,'distDuct')):
            os.mkdir(os.path.join(subclusterplotdir,prot,'distDuct'))
        sc.settings.figdir=os.path.join(subclusterplotdir,prot,'distDuct')

        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
            print(v)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]


            sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')





outDuct_Breast tissueaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situcytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ and breast tissueaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ and breast tissuecytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ with early infiltratioaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ with early infiltratiocytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinomaaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinomacytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma and breast tissueaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma and breast tissuecytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Micropapillary type ductal carcinoma in situ wiaSMA


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Micropapillary type ductal carcinoma in situ wicytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [224]:
#distance excluding cells in duct - by prog
vmin=0
vmax=np.max(latent_adata.obs['dist2duct_log_outOnly'])
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    for prot in vminlist.keys():
        v='dist2duct_log_outOnly_'+prot+prog
        protvalues=latent_adata.obs['dist2duct_log_outOnly_'+prog].to_numpy(copy=True)
        protvalues[np.logical_not(latent_adata.obs[prot+'log']>0)]=np.nan
        latent_adata.obs[v]=protvalues
        if not os.path.exists(os.path.join(subclusterplotdir,prot,'distDuct')):
            os.mkdir(os.path.join(subclusterplotdir,prot,'distDuct'))
        sc.settings.figdir=os.path.join(subclusterplotdir,prot,'distDuct')

        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
            print(v)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



            sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
        #     sc.tl.umap(latent_adata, init_pos='paga')
            sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)






dist2duct_log_outOnly_aSMABreast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_aSMADuctal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_aSMADuctal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_aSMADuctal carcinoma in situ with early infiltratio


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_aSMAInvasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_cytokeratinInvasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_aSMAInvasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_aSMAMicropapillary type ductal carcinoma in situ wi


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [227]:
#%cells within distance threshold of ducts normalized by total outside of ducts - by prog
distanceThresh=[radius*2*2,radius*2*5, radius*2*10]

if not os.path.exists(os.path.join(subclusterplotdir,'distThresh')):
    os.mkdir(os.path.join(subclusterplotdir,'distThresh'))
for d in distanceThresh:
    for prog in np.unique(progList):
        for prot in vminlist.keys():
            v='distThresh_'+prot+prog+'_'+str(d)
            protvalues=latent_adata.obs['distThresh_'+prog+'_'+str(d)].to_numpy(copy=True)
            protvalues[np.logical_not(latent_adata.obs[prot+'log']>0)]=np.nan
            latent_adata.obs[v]=protvalues
            if not os.path.exists(os.path.join(subclusterplotdir,prot,'distDuct','distThresh')):
                os.mkdir(os.path.join(subclusterplotdir,prot,'distDuct','distThresh'))
            sc.settings.figdir=os.path.join(subclusterplotdir,prot,'distDuct','distThresh')

            latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
            idxKeep=np.repeat(False,latent_adata_sub.shape[0])
            for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
                if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                    idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
            latent_adata_sub=latent_adata_sub[idxKeep]
            if latent_adata_sub.shape[0]>0:
                print(v)
        #         subsizes=np.zeros(np.unique(kmeans_sub).size)
                nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
        #         for sidx in range(nonzeroSub.size):
        #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
                latent_adata_sub.uns['kmeans_sub_sizes']=counts
                _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
                latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



                sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')



distThresh_aSMABreast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ with early infiltratio_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAInvasive ductal carcinoma_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinInvasive ductal carcinoma_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAInvasive ductal carcinoma and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAMicropapillary type ductal carcinoma in situ wi_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMABreast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ with early infiltratio_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAInvasive ductal carcinoma_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinInvasive ductal carcinoma_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAInvasive ductal carcinoma and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAMicropapillary type ductal carcinoma in situ wi_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMABreast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMADuctal carcinoma in situ with early infiltratio_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAInvasive ductal carcinoma_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinInvasive ductal carcinoma_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAInvasive ductal carcinoma and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_aSMAMicropapillary type ductal carcinoma in situ wi_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
