In [2]:
import time
import os

import scanpy as sc
import numpy as np
import scipy.sparse as sp

import torch
from torch import optim
from torch.utils.data import DataLoader

import models.loadImg as loadImg
import models.modelsCNN as modelsCNN
import models.optimizer as optimizer

import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import gc
from skimage import io
import umap
from sklearn.cluster import MiniBatchKMeans,AgglomerativeClustering
from sklearn.decomposition import PCA

import anndata as ad

In [3]:
sc.settings.verbosity = 3

radius=48
datadir='/media/xinyi/dcis2idc/data'

savenamesample='_valsamples'
with open(os.path.join(datadir,'processed','latent311'+savenamesample), 'rb') as input:
    latent=pickle.load(input)

name='exp0'
logsavepath='/media/xinyi/dcis2idc/log/cnnvae'+name
modelsavepath='/media/xinyi/dcis2idc/models/cnnvae'+name
plotsavepath='/media/xinyi/dcis2idc/plots/cnnvae'+name

ep=311

with open(os.path.join(datadir,'processed','train_cnnvae_names'+savenamesample), 'rb') as input:
    allImgNames=pickle.load(input)
#plot by disease progression
br1003aSpecs=pd.read_excel('/media/xinyi/dcis2idc/data/BR1003a specs.xlsx',header=10)
br301Specs=pd.read_excel('/media/xinyi/dcis2idc/data/BR301 specs.xlsx',header=10)
br8018aSpecs=pd.read_excel('/media/xinyi/dcis2idc/data/BR8018a specs.xlsx',header=10)
br1003aSpecs.index=br1003aSpecs.loc[:,'Position']
br301Specs.index=br301Specs.loc[:,'Position']
br8018aSpecs.index=br8018aSpecs.loc[:,'Position']

progList=np.copy(allImgNames)
for s in np.unique(allImgNames):
    ssplit=s.split('_')
    if 'br1003a'==ssplit[0]:
        prog_s=br1003aSpecs.loc[(ssplit[-1],'Pathology diagnosis')]
    elif 'br301'==ssplit[0]:
        prog_s=br301Specs.loc[(ssplit[-1],'Pathology diagnosis')]
    elif 'br8018a'==ssplit[0]:
        prog_s=br8018aSpecs.loc[(ssplit[-1],'Pathology diagnosis')]
    progList[allImgNames==s]=prog_s

In [4]:
with open(os.path.join(plotsavepath,'cluster_reordered','minibatchkmean_ncluster'+str(8)+'n_pcs'+str(50)+'epoch'+str(ep)+'_plottingIdx_progBalanced_'+str(0)+'_reordered'+'_subcluster_all','pagaPos_reordered'), 'rb') as output:
    trainPagaPos=pickle.load(output)

In [5]:
neworder=[1, 5, 3, 7, 2, 0, 4, 6]
#use chosen subcluster number and save plots
sc.settings.verbosity = 3
# subcluster=8
subclusterDict={0:[4],1:[6],2:[8],3:[6],4:[6],5:[6],6:[6],7:[4]}
ncluster=8

plotepoch=311
clustersavedir=os.path.join(plotsavepath,'cluster'+savenamesample+'_reordered')
clusterplotdir=os.path.join(clustersavedir,'plots')
n_pcs=50
savenamecluster='minibatchkmean_ncluster'+str(ncluster)+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)
with open(os.path.join(clustersavedir,savenamecluster+'_all'), 'rb') as output:
    clusterRes=pickle.load(output)

latent_adata=ad.AnnData(latent)
kmeans_sub=(np.zeros(clusterRes.size)-1).astype(str)
for c in np.unique(clusterRes):
    print('cluster'+str(c))
    
    subclustersavedir=os.path.join(clustersavedir,savenamecluster+'_plottingIdx_progBalanced_'+str(0)+'_subcluster'+str(neworder[c]))
    with open(os.path.join(subclustersavedir,'minibatchkmean_ncluster'+str(subclusterDict[neworder[c]][0])+'n_pcs'+str(n_pcs)+'epoch'+str(plotepoch)+'_all'), 'rb') as output:
        subclusterRes=pickle.load(output)
    print(np.unique(subclusterRes))
    kmeans_sub[clusterRes==c]=np.char.add(np.repeat(str(c)+'-',subclusterRes.size),subclusterRes.astype(str))
        
latent_adata.obs['kmeans_sub']=kmeans_sub
latent_adata.obs['kmeans_sub']=latent_adata.obs['kmeans_sub'].astype('category')
latent_adata.obs['kmeans']=clusterRes.astype(str)
latent_adata.obs['kmeans']=latent_adata.obs['kmeans'].astype('category')


subclusterplotdir=os.path.join(clustersavedir,savenamecluster+'_subcluster_all')
if not os.path.exists(subclusterplotdir):
    os.mkdir(subclusterplotdir)


sc.settings.figdir=os.path.join(subclusterplotdir)

np.random.seed(3)
sc.tl.pca(latent_adata, svd_solver='arpack')
sc.pp.neighbors(latent_adata, n_neighbors=4, n_pcs=20)
sc.tl.paga(latent_adata, groups='kmeans_sub')
plt.rcParams["figure.figsize"] = (10, 10)
plt.rcParams['figure.dpi'] = 1200
# # # sc.pl.paga(latent_adata, color=['kmeans'],save='graph.png') #default threshold is 0.01
# sc.pl.paga(latent_adata, color=['kmeans'],pos=trainPagaPos,save='graph_thresh001.png',threshold=0.01,show=False,random_state=6)
# sc.tl.umap(latent_adata, init_pos='paga')
# sc.pl.umap(latent_adata,color=['kmeans'],legend_loc='on data',save='graph_umap.png',show=False)



  latent_adata=ad.AnnData(latent)


cluster0
[0 1 2 3 4 5]
cluster1
[0 1 2 3 4 5]
cluster2
[0 1 2 3 4 5]
cluster3
[0 1 2 3]
cluster4
[0 1 2 3 4 5 6 7]
cluster5
[0 1 2 3]
cluster6
[0 1 2 3 4 5]
cluster7
[0 1 2 3 4 5]
computing PCA
    with n_comps=50
    finished (0:03:15)
computing neighbors
    using 'X_pca' with n_pcs = 20
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:01:34)
running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:02:05)


## color by proteins - background subtracted

In [6]:
with open(os.path.join(datadir,'processed','train_cnnvae_cellLabels'+savenamesample), 'rb') as output:
    cellIDlist=pickle.load(output)

In [26]:
stats2plot=['int_median','int_mode','int_mean']
stains=['aSMA','cytokeratin','collagen1','gh2ax','ki67']
uniquenames,nameIdx=np.unique(allImgNames,return_index=True)

ncluster=8
plotepoch=311
    
allstats=None
alllabels=None
allvarnames=None

minMinDict={'aSMA':[],'cytokeratin':[],'collagen1':[],'gh2ax':[],'ki67':[]}

allidx=None
idx_temp=np.arange(allImgNames.size)
for sidx in range(uniquenames.size):
    s=np.unique(allImgNames)[sidx]
    plottingIdx_i_s=np.arange(allImgNames.size)[allImgNames==s]-nameIdx[sidx]

    print(s)
    assert np.min(plottingIdx_i_s)>=0
#         ssplit=s.split('_')
    slabels=kmeans_sub[allImgNames==s]
    sidx=idx_temp[allImgNames==s]
    
    for stain_s in stains:
        if not stain_s in s:
            continue
        path_s=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'cellular_'+stain_s.lower(),s.split('_')[-1] +'.csv')
        if not os.path.exists(path_s):
            continue
        print(stain_s)
        
        stats_s=pd.read_csv(path_s)
        stats_s.index=stats_s.loc[:,'label']
        
        minMin=np.min(stats_s.loc[:,'int_min'])
        print(minMin)
        
        minMinDict[stain_s].append(minMin)
        if allstats is None:
            allstats=stats_s.loc[cellIDlist[s][plottingIdx_i_s],stats2plot].to_numpy()-minMin
            alllabels=np.copy(slabels)
            allvarnames=np.repeat(stain_s,slabels.size)
            allidx=np.copy(sidx)
        else:
            allstats=np.concatenate((allstats,stats_s.loc[cellIDlist[s][plottingIdx_i_s],stats2plot].to_numpy()-minMin),axis=0)
            alllabels=np.concatenate((alllabels,np.copy(slabels)))
            allvarnames=np.concatenate((allvarnames,np.repeat(stain_s,slabels.size)))
            allidx=np.concatenate((allidx,sidx))
            
            

br1003a_2_cytokeratin_555_ki67_647_hoechst_A1
cytokeratin
2234.0
ki67
840.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_A10
cytokeratin
2777.0
ki67
902.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_A2
cytokeratin
2245.0
ki67
805.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_A3
cytokeratin
2182.0
ki67
915.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_A5
cytokeratin
2373.0
ki67
949.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_A7
cytokeratin
2419.0
ki67
913.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_A8
cytokeratin
2419.0
ki67
923.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_B1
cytokeratin
2032.0
ki67
1444.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_B10
cytokeratin
1312.0
ki67
1019.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_B2
cytokeratin
2098.0
ki67
1510.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_B3
cytokeratin
2194.0
ki67
1523.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_B4
cytokeratin
1513.0
ki67
906.0
br1003a_2_cytokeratin_555_ki67_647_hoechst_B5
cytokeratin
742.0
ki67
621.0
br1003a

br8018a_2_cytokeratin_555_ki67_647_hoechst_B10
cytokeratin
1995.0
ki67
1841.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B2
cytokeratin
1632.0
ki67
1193.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B3
cytokeratin
1595.0
ki67
1388.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B5
cytokeratin
1807.0
ki67
1494.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B6
cytokeratin
1761.0
ki67
1648.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B7
cytokeratin
2073.0
ki67
1650.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B8
cytokeratin
2046.0
ki67
2067.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_B9
cytokeratin
1907.0
ki67
1840.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_C1
cytokeratin
1468.0
ki67
772.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_C10
cytokeratin
1895.0
ki67
1020.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_C2
cytokeratin
1444.0
ki67
773.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_C3
cytokeratin
1407.0
ki67
743.0
br8018a_2_cytokeratin_555_ki67_647_hoechst_C4
cytokeratin
1552.0
ki67
864.0
b

In [13]:
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 

for v in np.unique(allvarnames):
    plotidx=allidx[allvarnames==v]
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[plotidx]=np.log(allstats[allvarnames==v,0]+1)
    latent_adata.obs[v+'log']=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)

    sc.pl.paga(latent_adata, color=[v+'log'],pos=trainPagaPos,save='graph'+v+'_allcells_log.png',show=False,random_state=6)
#     sc.tl.umap(latent_adata, init_pos='paga')
    sc.pl.umap(latent_adata,color=[v+'log'],legend_loc='on data',save='graph_umap'+v+'_log.png',show=False,size=3)


In [17]:
#by pathology
#changed paga plotting function to enable vmax and vmin
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
latent_adata.obs['prog']=progList
vminlist={'aSMA':5.3,'collagen1':6.3,'cytokeratin':6.35,'gh2ax':5.1,'ki67':5.8}
vmaxlist={'aSMA':7.4,'collagen1':8.15,'cytokeratin':8.5,'gh2ax':7.55,'ki67':8.65}
# sc.tl.umap(latent_adata, init_pos='paga')
# for v in np.unique(allvarnames):
for v in ['cytokeratin']:
    vmin=vminlist[v]
    vmax=vmaxlist[v]
    for prog in np.unique(progList[np.isfinite(latent_adata.obs[v+'log'])]):
        vprog=v+'_log_'+prog
        plotidx=allidx[allvarnames==v]
        protvalues=np.repeat(np.nan,latent_adata.shape[0])
        protvalues[plotidx]=np.log(allstats[allvarnames==v,0]+1)
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=protvalues

        sc.settings.figdir=os.path.join(subclusterplotdir,'cytokeratin')

        sc.pl.paga(latent_adata, color=[vprog],pos=trainPagaPos,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#         sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')



--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


In [18]:
stats2plot=['int_median','int_mode']
stains=['aSMA','cytokeratin','collagen1','gh2ax','ki67']
uniquenames,nameIdx=np.unique(allImgNames,return_index=True)

ncluster=8
plotepoch=311
    
allstats=None
allstats_duct=None
alllabels=None
allvarnames=None

allidx=None
idx_temp=np.arange(allImgNames.size)
for sidx in range(uniquenames.size):
    s=np.unique(allImgNames)[sidx]
    plottingIdx_i_s=np.arange(allImgNames.size)[allImgNames==s]-nameIdx[sidx]

    path_s_duct=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'duct_nuc_membership',s.split('_')[-1] +'.csv')
    if not os.path.exists(path_s_duct):
        continue
    print(s)
    
    assert np.min(plottingIdx_i_s)>=0
#         ssplit=s.split('_')
    slabels=kmeans_sub[allImgNames==s]
    sidx=idx_temp[allImgNames==s]
    
    stats_s_duct=pd.read_csv(path_s_duct)
    stats_s_duct.index=stats_s_duct.loc[:,'label']
    stats_s_duct=stats_s_duct.loc[cellIDlist[s][plottingIdx_i_s],'int_median'].to_numpy()
    
    for stain_s in stains:
        if not stain_s in s:
            continue
        path_s=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'cellular_'+stain_s.lower(),s.split('_')[-1] +'.csv')
        if not os.path.exists(path_s):
            continue
        print(stain_s)
        
        stats_s=pd.read_csv(path_s)
        stats_s.index=stats_s.loc[:,'label']
        
        minMin=np.min(stats_s.loc[:,'int_min'])
        
        if allstats is None:
            allstats=stats_s.loc[cellIDlist[s][plottingIdx_i_s],stats2plot].to_numpy()-minMin
            alllabels=np.copy(slabels)
            allvarnames=np.repeat(stain_s,slabels.size)
            allidx=np.copy(sidx)
            allstats_duct=np.copy(stats_s_duct)
        else:
            allstats=np.concatenate((allstats,stats_s.loc[cellIDlist[s][plottingIdx_i_s],stats2plot].to_numpy()-minMin),axis=0)
            alllabels=np.concatenate((alllabels,np.copy(slabels)))
            allvarnames=np.concatenate((allvarnames,np.repeat(stain_s,slabels.size)))
            allidx=np.concatenate((allidx,sidx))
            allstats_duct=np.concatenate((allstats_duct,stats_s_duct))
            

br1003a_2_cytokeratin_555_ki67_647_hoechst_B1
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_B2
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_B3
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_B5
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_B7
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_I2
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_I3
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_I7
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_I8
cytokeratin
ki67
br1003a_2_cytokeratin_555_ki67_647_hoechst_I9
cytokeratin
ki67
br301_5_cytokeratin_555_ki67_647_hoechst_A1
cytokeratin
ki67
br301_5_cytokeratin_555_ki67_647_hoechst_A2
cytokeratin
ki67
br301_5_cytokeratin_555_ki67_647_hoechst_A3
cytokeratin
ki67
br301_5_cytokeratin_555_ki67_647_hoechst_A4
cytokeratin
ki67
br301_5_cytokeratin_555_ki67_647_hoechst_A5
cytokeratin
ki67
br301_5_cytokeratin_555_ki67_647_hoechst_A6
cytokeratin
ki67
br30

In [19]:
#by pathology -- separate by in vs out of duct
#changed paga plotting function to enable vmax and vmin
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
latent_adata.obs['prog']=progList
vminlist={'aSMA':5.3,'collagen1':6.3,'cytokeratin':6.35,'gh2ax':5.1,'ki67':5.8}
vmaxlist={'aSMA':7.4,'collagen1':8.15,'cytokeratin':8.5,'gh2ax':7.55,'ki67':8.65}
for v in np.unique(allvarnames):
    vmin=vminlist[v]
    vmax=vmaxlist[v]
    for prog in np.unique(progList[np.isfinite(latent_adata.obs[v+'log'])]):
        vprog=v+'_log_'+prog+'_induct'
        plotidx=allidx[np.logical_and(allvarnames==v,allstats_duct>0)]
        protvalues=np.repeat(np.nan,latent_adata.shape[0])
        protvalues[plotidx]=np.log(allstats[np.logical_and(allvarnames==v,allstats_duct>0),0]+1)
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=protvalues

        sc.settings.figdir=os.path.join(subclusterplotdir,'cytokeratin')

        sc.pl.paga(latent_adata, color=[vprog],pos=trainPagaPos,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#         sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')

        vprog=v+'_log_'+prog+'_outduct'
        plotidx=allidx[np.logical_and(allvarnames==v,allstats_duct==0)]
        protvalues=np.repeat(np.nan,latent_adata.shape[0])
        protvalues[plotidx]=np.log(allstats[np.logical_and(allvarnames==v,allstats_duct==0),0]+1)
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=protvalues

        sc.settings.figdir=os.path.join(subclusterplotdir,'cytokeratin')

        sc.pl.paga(latent_adata, color=[vprog],pos=trainPagaPos,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#         sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')




--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [21]:
gc.collect()

0

### threshold protein

In [22]:
minCell=5

In [34]:
allstatslog_std1=np.log(allstats[:,2]+1)
for v in np.unique(allvarnames):
    meanProt=np.mean(np.log(allstats[allvarnames==v,2]+1)) #mean
    stdProt=np.std(np.log(allstats[allvarnames==v,2]+1))
    zeroIdx=np.arange(allvarnames.shape[0])[allvarnames==v][allstatslog_std1[allvarnames==v]<(meanProt+stdProt)]
    allstatslog_std1[zeroIdx]=0
    
    plotidx=allidx[allvarnames==v]
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[plotidx]=allstatslog_std1[allvarnames==v]
    latent_adata.obs[v+'log']=protvalues

In [29]:
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
# sc.tl.umap(latent_adata, init_pos='paga')
for v in np.unique(allvarnames):

    sc.settings.figdir=os.path.join(subclusterplotdir)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v+'log'])]
    latent_adata_sub.uns['kmeans_sub_sizes']=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)[1]
    sc.pl.paga(latent_adata_sub, color=[v+'log'],pos=trainPagaPos,node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells_log.png',show=False,random_state=6)
    sc.pl.umap(latent_adata_sub,color=[v+'log'],legend_loc='on data',save='graph_umap'+v+'_log.png',show=False,size=3)

gc.collect() 




  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


4359

In [35]:
#%cells with positive expression

for v in np.unique(allvarnames):
    protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
    protvalues[protvalues>0]=1
    latent_adata.obs[v+'binary']=protvalues

    sc.settings.figdir=os.path.join(subclusterplotdir)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v+'log'])]
    latent_adata_sub.uns['kmeans_sub_sizes']=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)[1]
    sc.pl.paga(latent_adata_sub, color=[v+'binary'],pos=trainPagaPos,node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells_PosFrac.png',show=False,random_state=6)
gc.collect()

In [40]:
#by pathology; exprs and %positive
#changed paga plotting function to enable vmax and vmin
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
latent_adata.obs['prog']=progList
vminlist={'aSMA':None,'collagen1':None,'cytokeratin':0,'gh2ax':None,'ki67':0}
vmaxlist={'aSMA':None,'collagen1':None,'cytokeratin':7,'gh2ax':None,'ki67':8.6}
vminlistFrac={'aSMA':None,'collagen1':None,'cytokeratin':0,'gh2ax':None,'ki67':0}
vmaxlistFrac={'aSMA':None,'collagen1':None,'cytokeratin':0.8,'gh2ax':None,'ki67':1}
for v in np.unique(allvarnames):
    vmin=vminlist[v]
    vmax=vmaxlist[v]
    vminFrac=vminlistFrac[v]
    vmaxFrac=vmaxlistFrac[v]
    for prog in np.unique(progList[np.isfinite(latent_adata.obs[v+'log'])]):
        vprog=v+'_log_'+prog
        
        if not os.path.exists(os.path.join(subclusterplotdir,v)):
            os.mkdir(os.path.join(subclusterplotdir,v))
        sc.settings.figdir=os.path.join(subclusterplotdir,v)
        
        latent_adata_sub=latent_adata[np.logical_and(np.isfinite(latent_adata.obs[v+'log'].to_numpy()),progList==prog)]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
#         print(idxKeep)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]
        
        sc.pl.paga(latent_adata_sub, color=[v+'log'],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
#         sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata_sub,color=[v+'log'],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')

        sc.pl.paga(latent_adata_sub, color=[v+'binary'],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells_PosFrac.png',show=False,random_state=6,vmin=vminFrac,vmax=vmaxFrac,cmap='jet')


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


In [38]:
gc.collect()

163824

### add distance to duct - 0 is in duct

In [41]:
import sklearn.metrics 
with open(os.path.join(datadir,'processed','train_cnnvae_coord'+savenamesample), 'rb') as output:
    coordlist=pickle.load(output)
    
#distance to nearest cell in duct
uniquenames,nameIdx=np.unique(allImgNames,return_index=True)

allstats=None
alllabels=None
allprog=None

allidx=None
idx_temp=np.arange(allImgNames.size)
for sidx in range(uniquenames.size):
    s=np.unique(allImgNames)[sidx]
    
    path_s=os.path.join(datadir,'_'.join(s.split('_')[:-1]),'duct_nuc_membership',s.split('_')[-1] +'.csv')
    if not os.path.exists(path_s):
        continue
    print(s)
    
    plottingIdx_i_s=np.arange(allImgNames.size)[allImgNames==s]-nameIdx[sidx]
    assert np.min(plottingIdx_i_s)>=0
#         ssplit=s.split('_')
    slabels=kmeans_sub[allImgNames==s]
    
    
    sidx_plot=idx_temp[allImgNames==s]

    if plottingIdx_i_s.size==0:
        continue
    assert np.min(plottingIdx_i_s)>=0

    stats_s_all=pd.read_csv(path_s)
    stats_s_all.index=stats_s_all.loc[:,'label']
    stats_s=stats_s_all.loc[cellIDlist[s][plottingIdx_i_s],'int_median'].to_numpy()

    stats_s_all=stats_s_all.loc[cellIDlist[s],'int_median'].to_numpy()
    scoord_all_duct=coordlist[allImgNames==s][stats_s_all>0]

#         ssplit=s.split('_')
    sprog=progList[allImgNames==s]


    scoord=coordlist[allImgNames==s][stats_s==0]

    _,distS=sklearn.metrics.pairwise_distances_argmin_min(scoord,scoord_all_duct)
    distAll=np.zeros(stats_s.shape[0])
    distAll[stats_s==0]=distS
#     distances=np.concatenate((distances,distS))
#     sublabels=np.concatenate((sublabels,slabels[stats_s==0]))
#     subProgs=np.concatenate((subProgs,sprog[stats_s==0]))

    if allstats is None:
        allstats=np.copy(distAll)
        alllabels=np.copy(slabels)
        allprog=np.copy(sprog)
        allidx=np.copy(sidx_plot)
    else:
        allstats=np.concatenate((allstats,distAll))
        alllabels=np.concatenate((alllabels,np.copy(slabels)))
        allprog=np.concatenate((allprog,sprog))
        allidx=np.concatenate((allidx,sidx_plot))
            
            

br1003a_2_cytokeratin_555_ki67_647_hoechst_B1
br1003a_2_cytokeratin_555_ki67_647_hoechst_B2
br1003a_2_cytokeratin_555_ki67_647_hoechst_B3
br1003a_2_cytokeratin_555_ki67_647_hoechst_B5
br1003a_2_cytokeratin_555_ki67_647_hoechst_B7
br1003a_2_cytokeratin_555_ki67_647_hoechst_I2
br1003a_2_cytokeratin_555_ki67_647_hoechst_I3
br1003a_2_cytokeratin_555_ki67_647_hoechst_I7
br1003a_2_cytokeratin_555_ki67_647_hoechst_I8
br1003a_2_cytokeratin_555_ki67_647_hoechst_I9
br301_5_cytokeratin_555_ki67_647_hoechst_A1
br301_5_cytokeratin_555_ki67_647_hoechst_A2
br301_5_cytokeratin_555_ki67_647_hoechst_A3
br301_5_cytokeratin_555_ki67_647_hoechst_A4
br301_5_cytokeratin_555_ki67_647_hoechst_A5
br301_5_cytokeratin_555_ki67_647_hoechst_A6
br301_5_cytokeratin_555_ki67_647_hoechst_B1
br301_5_cytokeratin_555_ki67_647_hoechst_B2
br301_5_cytokeratin_555_ki67_647_hoechst_B3
br301_5_cytokeratin_555_ki67_647_hoechst_B4
br301_5_cytokeratin_555_ki67_647_hoechst_B5
br301_5_cytokeratin_555_ki67_647_hoechst_B6
br301_5_cyto

In [42]:
v='dist2duct_log'
protvalues=np.repeat(np.nan,latent_adata.shape[0])
protvalues[allidx]=np.log(allstats+1)
latent_adata.obs[v]=protvalues

v='outDuct'
protvalues=np.repeat(np.nan,latent_adata.shape[0])
allstats_binary=np.zeros(allstats.size)
allstats_binary[allstats>0]=1
protvalues[allidx]=allstats_binary
latent_adata.obs[v]=protvalues

v='dist2duct_log_outOnly'
protvalues=np.repeat(np.nan,latent_adata.shape[0])
protvalues[allidx[allstats>0]]=np.log(allstats[allstats>0]+1)
latent_adata.obs[v]=protvalues

In [47]:
latent_adata_sub.shape

(0, 6000)

In [48]:
#protein by pathology -- separate by in vs out of duct
#changed paga plotting function to enable vmax and vmin
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
#log scale
#thresh=1
# nneighbors_list=[4,10] 
# latent_adata.obs['prog']=progList
vminlist={'aSMA':None,'collagen1':None,'cytokeratin':0,'gh2ax':None,'ki67':0}
vmaxlist={'aSMA':None,'collagen1':None,'cytokeratin':7,'gh2ax':None,'ki67':8.6}
vminlistFrac={'aSMA':None,'collagen1':None,'cytokeratin':0,'gh2ax':None,'ki67':0}
vmaxlistFrac={'aSMA':None,'collagen1':None,'cytokeratin':0.8,'gh2ax':None,'ki67':1}
for v in np.unique(allvarnames):
    vmin=vminlist[v]
    vmax=vmaxlist[v]
    vminFrac=vminlistFrac[v]
    vmaxFrac=vmaxlistFrac[v]
    for prog in np.unique(progList[np.isfinite(latent_adata.obs[v+'log'])]):
        vprog=v+'_log_'+prog+'_induct'
        vprog_bin=v+'_binary_'+prog+'_induct'
        protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
        protvalues[latent_adata.obs['outDuct']!=0]=np.nan
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=np.copy(protvalues)
        protvalues[protvalues>0]=1
        latent_adata.obs[vprog_bin]=np.copy(protvalues)

        if not os.path.exists(os.path.join(subclusterplotdir,v,'duct')):
            os.mkdir(os.path.join(subclusterplotdir,v,'duct'))
        sc.settings.figdir=os.path.join(subclusterplotdir,v,'duct')

        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[vprog].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
            print(vprog)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]

            sc.pl.paga(latent_adata_sub, color=[vprog],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.umap(latent_adata_sub,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.paga(latent_adata_sub, color=[vprog_bin],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells_PosFrac.png',show=False,random_state=6,vmin=vminFrac,vmax=vmaxFrac,cmap='jet')


        
        vprog=v+'_log_'+prog+'_outduct'
        vprog_bin=v+'_binary_'+prog+'_outduct'
        protvalues=latent_adata.obs[v+'log'].to_numpy(copy=True)
        protvalues[latent_adata.obs['outDuct']!=1]=np.nan
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[vprog]=np.copy(protvalues)
        protvalues[protvalues>0]=1
        latent_adata.obs[vprog_bin]=np.copy(protvalues)
        
        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[vprog].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]

            sc.settings.figdir=os.path.join(subclusterplotdir,v,'duct')

            sc.pl.paga(latent_adata_sub, color=[vprog],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells.png',show=False,random_state=6,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.umap(latent_adata_sub,color=[vprog],legend_loc='on data',save='graph_umap'+vprog+'.png',show=False,size=3,vmin=vmin,vmax=vmax,cmap='jet')
            sc.pl.paga(latent_adata_sub, color=[vprog_bin],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+vprog+'_allcells_PosFrac.png',show=False,random_state=6,vmin=vminFrac,vmax=vmaxFrac,cmap='jet')





cytokeratin_log_Breast tissue_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Ductal carcinoma in situ_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Ductal carcinoma in situ and breast tissue_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Ductal carcinoma in situ with early infiltrati_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Hyperplasia_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Invasive ductal carcinoma_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Invasive ductal carcinoma and breast tissue_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
cytokeratin_log_Micropapillary type ductal carcinoma in situ w_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Breast tissue_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Ductal carcinoma in situ_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Ductal carcinoma in situ and breast tissue_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Ductal carcinoma in situ with early infiltrati_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Hyperplasia_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Invasive ductal carcinoma_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Invasive ductal carcinoma and breast tissue_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])
ki67_log_Micropapillary type ductal carcinoma in situ w_induct


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
--> added 'pos', the PAGA positions (adata.uns['paga'])


In [52]:
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
v='dist2duct_log'
sc.settings.figdir=os.path.join(subclusterplotdir)
sc.pl.paga(latent_adata, color=[v],pos=trainPagaPos,node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6)
# sc.tl.umap(latent_adata, init_pos='paga')
sc.pl.umap(latent_adata,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3)



--> added 'pos', the PAGA positions (adata.uns['paga'])


<AxesSubplot:title={'center':'dist2duct_log'}, xlabel='UMAP1', ylabel='UMAP2'>

In [65]:
#by prog
latent_adata.obs['prog']=progList
vmin=0
vmax=np.max(latent_adata.obs['dist2duct_log'])
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    v='dist2duct_log_'+prog
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx]=np.log(allstats+1)
    protvalues[latent_adata.obs['prog']!=prog]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)
    
    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
    #     sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)



dist2duct_log_Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Ductal carcinoma in situ with early infiltrati


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Hyperplasia


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_Micropapillary type ductal carcinoma in situ w


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [56]:
#%cells out of duct
#add single cell plots & use all cells -- use previously calculated graph in combined subcluster (neighbors=4)
v='outDuct'
sc.settings.figdir=os.path.join(subclusterplotdir)

sc.pl.paga(latent_adata,pos=trainPagaPos,node_size_power=1,fontoutline=0.5, color=[v],save='graph'+v+'_allcells.png',show=False,random_state=6)


--> added 'pos', the PAGA positions (adata.uns['paga'])


<Axes:>

In [64]:
# %cells out of duct - by prog
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    v='outDuct_'+prog
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx]=allstats_binary
    protvalues[latent_adata.obs['prog']!=prog]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)

    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]


        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')



outDuct_Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ with early infiltrati


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Hyperplasia


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Micropapillary type ductal carcinoma in situ w


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [59]:
#distance excluding cells in duct

v='dist2duct_log_outOnly'
sc.settings.figdir=os.path.join(subclusterplotdir)

sc.pl.paga(latent_adata, color=[v],pos=trainPagaPos,node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6)
# sc.tl.umap(latent_adata, init_pos='paga')
sc.pl.umap(latent_adata,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3)



--> added 'pos', the PAGA positions (adata.uns['paga'])


<AxesSubplot:title={'center':'dist2duct_log_outOnly'}, xlabel='UMAP1', ylabel='UMAP2'>

In [63]:
#distance excluding cells in duct - by prog
vmin=0
vmax=np.max(latent_adata.obs['dist2duct_log_outOnly'])
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    v='dist2duct_log_outOnly_'+prog
    protvalues=np.repeat(np.nan,latent_adata.shape[0])
    protvalues[allidx[allstats>0]]=np.log(allstats[allstats>0]+1)
    protvalues[latent_adata.obs['prog']!=prog]=np.nan
    latent_adata.obs[v]=protvalues
    sc.settings.figdir=os.path.join(subclusterplotdir)

    latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
    idxKeep=np.repeat(False,latent_adata_sub.shape[0])
    for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
        if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
            idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
    latent_adata_sub=latent_adata_sub[idxKeep]
    if latent_adata_sub.shape[0]>0:
        print(v)
#         subsizes=np.zeros(np.unique(kmeans_sub).size)
        nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
#         for sidx in range(nonzeroSub.size):
#             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
        latent_adata_sub.uns['kmeans_sub_sizes']=counts
        _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
        latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



        sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
    #     sc.tl.umap(latent_adata, init_pos='paga')
        sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)




dist2duct_log_outOnly_Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Ductal carcinoma in situ with early infiltrati


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Hyperplasia


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_Micropapillary type ductal carcinoma in situ w


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [62]:
#%cells within distance threshold of ducts normalized by total outside of ducts - by prog
latent_adata.obs['prog']=progList
distanceThresh=[radius*2*2,radius*2*5, radius*2*10]

if not os.path.exists(os.path.join(subclusterplotdir,'distThresh')):
    os.mkdir(os.path.join(subclusterplotdir,'distThresh'))
for d in distanceThresh:
    for prog in np.unique(progList):
        v='distThresh_'+prog+'_'+str(d)
        protvalues=np.repeat(np.nan,latent_adata.shape[0])
        protvalues[allidx[allstats>0]]=1
        protvalues[allidx[allstats>d]]=0
        protvalues[latent_adata.obs['prog']!=prog]=np.nan
        latent_adata.obs[v]=protvalues
        sc.settings.figdir=os.path.join(subclusterplotdir,'distThresh')
        
        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
            print(v)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



            sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')




distThresh_Breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ with early infiltrati_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Hyperplasia_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Micropapillary type ductal carcinoma in situ w_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ with early infiltrati_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Hyperplasia_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Micropapillary type ductal carcinoma in situ w_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Ductal carcinoma in situ with early infiltrati_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Hyperplasia_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Invasive ductal carcinoma and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_Micropapillary type ductal carcinoma in situ w_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


### plot distance of protein positive cells

In [71]:
# %cells out of duct - by prog
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    for prot in np.unique(allvarnames):
        v='outDuct_'+prog+prot
        protvalues=latent_adata.obs['outDuct_'+prog].to_numpy(copy=True)
        protvalues[np.logical_not(latent_adata.obs[prot+'log']>0)]=np.nan
        latent_adata.obs[v]=protvalues
        if not os.path.exists(os.path.join(subclusterplotdir,prot,'distDuct')):
            os.mkdir(os.path.join(subclusterplotdir,prot,'distDuct'))
        sc.settings.figdir=os.path.join(subclusterplotdir,prot,'distDuct')

        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0:
            print(v)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]


            sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')






outDuct_Breast tissuecytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Breast tissueki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situcytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ and breast tissuecytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ and breast tissueki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ with early infiltraticytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Ductal carcinoma in situ with early infiltratiki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Hyperplasiacytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Hyperplasiaki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinomacytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinomaki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma and breast tissuecytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Invasive ductal carcinoma and breast tissueki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Micropapillary type ductal carcinoma in situ wcytokeratin


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
outDuct_Micropapillary type ductal carcinoma in situ wki67


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [82]:
#distance excluding cells in duct - by prog
vmin=0
vmax=np.max(latent_adata.obs['dist2duct_log_outOnly'])
for prog in np.unique(progList[np.isfinite(latent_adata.obs['dist2duct_log'])]):
    for prot in np.unique(allvarnames):
        v='dist2duct_log_outOnly_'+prot+prog
        protvalues=latent_adata.obs['dist2duct_log_outOnly_'+prog].to_numpy(copy=True)
        protvalues[np.logical_not(latent_adata.obs[prot+'log']>0)]=np.nan
        latent_adata.obs[v]=protvalues
        if not os.path.exists(os.path.join(subclusterplotdir,prot,'distDuct')):
            os.mkdir(os.path.join(subclusterplotdir,prot,'distDuct'))
        sc.settings.figdir=os.path.join(subclusterplotdir,prot,'distDuct')

        latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
        idxKeep=np.repeat(False,latent_adata_sub.shape[0])
        for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
            if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
        latent_adata_sub=latent_adata_sub[idxKeep]
        if latent_adata_sub.shape[0]>0 and np.unique(latent_adata_sub.obs['kmeans_sub']).size>1:
            print(v)
    #         subsizes=np.zeros(np.unique(kmeans_sub).size)
            nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
    #         for sidx in range(nonzeroSub.size):
    #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
            latent_adata_sub.uns['kmeans_sub_sizes']=counts
            _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
            latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



            sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')
        #     sc.tl.umap(latent_adata, init_pos='paga')
            sc.pl.umap(latent_adata_sub,color=[v],legend_loc='on data',save='graph_umap'+v+'.png',show=False,size=3,vmin=vmin,vmax=vmax)



dist2duct_log_outOnly_ki67Breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Ductal carcinoma in situ


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_cytokeratinDuctal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Ductal carcinoma in situ and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_cytokeratinDuctal carcinoma in situ with early infiltrati


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Ductal carcinoma in situ with early infiltrati


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Hyperplasia


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_cytokeratinInvasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Invasive ductal carcinoma


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Invasive ductal carcinoma and breast tissue


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_cytokeratinMicropapillary type ductal carcinoma in situ w


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
dist2duct_log_outOnly_ki67Micropapillary type ductal carcinoma in situ w


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])


In [81]:
#%cells within distance threshold of ducts normalized by total outside of ducts - by prog
distanceThresh=[radius*2*2,radius*2*5, radius*2*10]

if not os.path.exists(os.path.join(subclusterplotdir,'distThresh')):
    os.mkdir(os.path.join(subclusterplotdir,'distThresh'))
for d in distanceThresh:
    for prog in np.unique(progList):
        for prot in np.unique(allvarnames):
            v='distThresh_'+prot+prog+'_'+str(d)
            protvalues=latent_adata.obs['distThresh_'+prog+'_'+str(d)].to_numpy(copy=True)
            protvalues[np.logical_not(latent_adata.obs[prot+'log']>0)]=np.nan
            latent_adata.obs[v]=protvalues
            if not os.path.exists(os.path.join(subclusterplotdir,prot,'distDuct','distThresh')):
                os.mkdir(os.path.join(subclusterplotdir,prot,'distDuct','distThresh'))
            sc.settings.figdir=os.path.join(subclusterplotdir,prot,'distDuct','distThresh')

            latent_adata_sub=latent_adata[np.isfinite(latent_adata.obs[v].to_numpy())]
            idxKeep=np.repeat(False,latent_adata_sub.shape[0])
            for c in np.unique(latent_adata_sub.obs['kmeans_sub']):
                if np.sum(latent_adata_sub.obs['kmeans_sub']==c)>minCell:
                    idxKeep=np.logical_or(idxKeep,latent_adata_sub.obs['kmeans_sub']==c)
            latent_adata_sub=latent_adata_sub[idxKeep]
            if latent_adata_sub.shape[0]>0 and np.unique(latent_adata_sub.obs['kmeans_sub']).size>1:
                print(v)
        #         subsizes=np.zeros(np.unique(kmeans_sub).size)
                nonzeroSub,counts=np.unique(latent_adata_sub.obs['kmeans_sub'],return_counts=True)
        #         for sidx in range(nonzeroSub.size):
        #             subsizes[np.unique(kmeans_sub)==nonzeroSub[sidx]]=counts[sidx]
                latent_adata_sub.uns['kmeans_sub_sizes']=counts
                _,idxKeep,_=np.intersect1d(np.unique(kmeans_sub),nonzeroSub,return_indices=True)
                latent_adata_sub.uns['paga']['connectivities']=latent_adata.uns['paga']['connectivities'][idxKeep][:,idxKeep]



                sc.pl.paga(latent_adata_sub, color=[v],pos=trainPagaPos[idxKeep],node_size_power=1,fontoutline=0.5,save='graph'+v+'_allcells.png',show=False,random_state=6,cmap='jet')




distThresh_ki67Breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinDuctal carcinoma in situ and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinDuctal carcinoma in situ with early infiltrati_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ with early infiltrati_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Hyperplasia_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinInvasive ductal carcinoma_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Invasive ductal carcinoma_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Invasive ductal carcinoma and breast tissue_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinMicropapillary type ductal carcinoma in situ w_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Micropapillary type ductal carcinoma in situ w_192


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinDuctal carcinoma in situ and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinDuctal carcinoma in situ with early infiltrati_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ with early infiltrati_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Hyperplasia_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinInvasive ductal carcinoma_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Invasive ductal carcinoma_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Invasive ductal carcinoma and breast tissue_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinMicropapillary type ductal carcinoma in situ w_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Micropapillary type ductal carcinoma in situ w_480


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinDuctal carcinoma in situ and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinDuctal carcinoma in situ with early infiltrati_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Ductal carcinoma in situ with early infiltrati_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Hyperplasia_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinInvasive ductal carcinoma_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Invasive ductal carcinoma_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Invasive ductal carcinoma and breast tissue_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_cytokeratinMicropapillary type ductal carcinoma in situ w_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
distThresh_ki67Micropapillary type ductal carcinoma in situ w_960


  self.data[key] = value


--> added 'pos', the PAGA positions (adata.uns['paga'])
