In [None]:
%matplotlib inline
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import numpy as np
import scanpy as sp
import anndata as an
import seaborn as sns
sns.set_style("white")
sns.set_color_codes("dark")

In [None]:
tissues = ['bladder','brain','diaphragm','fat BAT','fat GAT','fat MAT','fat SCAT','heart','kidney','large intestine','limb muscle','liver','lung','marrow','pancreas','skin','spleen','thymus','tongue','trachea']

### Abbreviation conventions used consistently throughout the figures

In [None]:
def rename(name):
    if name=='oligodendrocyte':
        return 'oligo.'
    if name=='endothelial cell':
        return 'endo. cell'
    if name=='epidermal cell':
        return 'epid. cell'
    if name=='microglial cell':
        return 'microglia'
    if name=='myeloid leukocyte':
        return 'm. leu.'
    if name=='bergmann glial cell':
        return 'BGC'
    if name=='epithelial cell':
        return 'epith. cell'
    if name=='neuroepithelial cell':
        return 'n. epith. cell'
    if name=='ependymal cell':
        return 'ependy. cell'
    if name=='oligo pre cell':
        return 'OPC'
    if name=='macrophage':
        return 'macro.'
    if name=='enteroendocrine cell':
        return 'enteroendo. cell'
    if name.startswith('t ') or name.startswith('b ') or name.startswith('nk '):
        return name[:2].upper()+name[2:]
    if  name.startswith('plasma b '):
        return 'plasma B cell'
    if name.startswith('skeletal'):
        return 'sat. cell'
    if name.startswith('smooth'):
        return 'SMC'
    if name.startswith('professional'):
        return 'APC'
    r = name.split(' ')
    if len(r)>1 and r[1]=='stem':
        return r[0][0].upper() + 'SC'
    return name

In [None]:
folder = '../pb_results/'
tissue_data_red = {}
tissue_data_cpm = {}
tissue_data_raw = {}
for tissue in tissues:
        print(tissue, end = ' ')
        tissue_data_raw[tissue]= an.read_h5ad(folder + 'tissue_data_raw_' + tissue + '.h5ad')
        tissue_data_cpm[tissue]= an.read_h5ad(folder + 'tissue_data_cpm_' + tissue + '.h5ad')
        tissue_data_red[tissue]= an.read_h5ad(folder + 'tissue_data_red_' + tissue + '.h5ad')

### KNN annotation is based on the neighbors calculated by BBKNN

In [None]:
for tissue in tissues:
  print(tissue, end = ' ')
  d = tissue_data_red[tissue]
  n = d.uns['neighbors']['connectivities']
  annot = []
  for ii in range(n.shape[0]):
    x = n.getrow(ii).nonzero()[1]
    b = d.obs['data'][x]
    t = d.obs['cell_type'][x]
    l = t[b!='pb']
    annot.append(l.value_counts().index[0])
  tissue_data_red[tissue].obs['cell_type_knn'] = annot

In [None]:
def label(x):
    if x['cell_type']=='pb':
        return x['cell_type_knn']
    else: return x['cell_type']

for tissue in tissues:
    A = tissue_data_red[tissue].obs
    A['cell_type'] = A.apply(lambda x: label(x), axis = 1)
    tissue_data_red[tissue].obs = A

In [None]:
def annotate_man(tissue,clusters,cell_type):
    d = tissue_data_red[tissue]
    a = d.obs
    a['cell_type'] = a.apply(lambda x: cell_type if (int(x['leiden']) in clusters) else x['cell_type'], axis = 1)
    d.obs = a
    return d
    
def remove(tissue,cluster):
    d = tissue_data_red[tissue]
    s = (d.obs['leiden'] != cluster)
    d = d[s,:]
    return d

### Next each tissue specific object is visualized to verify the annotations

In [None]:
def color(X,A,att,siz=10,cmap={},th=0,alpha=1,annotate=False,legend=True,ncol=1):
        colors = ['#e6194b', '#3cb44b', '#f58231', '#4363d8', '#911eb4', '#46f0f0', '#f032e6', '#ffe119','#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000']
        X_ = X ; att_ = A
        d = att_.sort_values(att)[att]
        if len(cmap)==0:
            cmap = {} ; i = -1
            for val in d:
                if val not in cmap:
                    if val !='nan': i+=1 ; cmap[val] = colors[i%len(colors)]
                    else: cmap[val] = '#000000'
        for key in cmap:
            s = (att_[att] == key)
            if s.sum()>th:
                x = X_[s,:]
                plt.scatter(x[:,0],x[:,1],color=cmap[key],label=key,s=siz,alpha=alpha)
                if annotate:
                    mx = x[:,0].mean()
                    my = x[:,1].mean()
                    plt.text(mx,my,key, bbox=dict(facecolor='white', alpha=0.75,boxstyle="round"))
        mi = min(X[:,0].min(),X[:,1].min())
        ma = max(X[:,0].max(),X[:,1].max())
        mi-= mi*0.05
        ma+= ma*0.05*np.sign(ma)
        plt.xlim(mi,ma)
        plt.ylim(mi,ma)
        if legend:
            plt.legend(markerscale=2,framealpha=0.5,ncol=ncol)

In [None]:
siz=2
alpha = 1
for tissue in tissues:
    att_  = tissue_data_red[tissue].obs
    coordinates = tissue_data_red[tissue].obsm['X_umap']
    max_ = coordinates.max().max()
    min_ = coordinates.min().min()
    plt.figure(figsize=(36,12))
    plt.suptitle(tissue)
    plt.subplot(1,3,1)
    plt.title('data overlap',loc='left')
    color(coordinates,att_,'data',siz=siz,alpha=alpha)
    plt.xlim(min_,max_)
    plt.ylim(min_,max_)
    plt.axis('off')
    plt.grid() 
    plt.subplot(1,3,2)
    plt.title('clustering',loc='left')
    color(coordinates,att_,'leiden',siz=siz,alpha=alpha,annotate=True)
    plt.xlim(min_,max_)
    plt.ylim(min_,max_)
    plt.axis('off')
    plt.grid()
    plt.subplot(1,3,3)
    plt.title('annotations',loc='left')
    color(coordinates,att_,'cell_type',siz=siz,alpha=alpha)
    plt.xlim(min_,max_)
    plt.ylim(min_,max_)
    plt.axis('off')

### We identify a few clusters where TMS annotations should be adjusted

In [None]:
tissue_data_red['fat BAT']=annotate_man('fat BAT',[4,8],'macrophage')
tissue_data_red['fat GAT']=annotate_man('fat GAT',[5,8,12],'macrophage')
tissue_data_red['fat GAT']=annotate_man('fat GAT',[13],'plasma b cell')
tissue_data_red['fat SCAT']=annotate_man('fat SCAT',[3,7,10],'macrophage')
tissue_data_red['fat SCAT']=annotate_man('fat SCAT',[11],'monocyte')
tissue_data_red['marrow']=annotate_man('marrow',[9],'b cell')
tissue_data_red['pancreas']=annotate_man('pancreas',[12],'stellate cell')
tissue_data_red['pancreas']=annotate_man('pancreas',[10],'macrophage')

In [None]:
for tissue in tissues:
    a_ = tissue_data_red[tissue].obs
    a_['cell_type_abbrev'] =  a_.apply(lambda x: rename(x['cell_type']), axis = 1)

In [None]:
for tissue in tissues:
    print(tissue,end = ' ')
    tissue_data_cpm[tissue].obs=tissue_data_red[tissue].obs.copy()
    tissue_data_raw[tissue].obs=tissue_data_red[tissue].obs.copy()
    tissue_data_red[tissue].write(folder + 'tissue_data_red_annotated_' + tissue + '.h5ad')
    tissue_data_cpm[tissue].write(folder + 'tissue_data_cpm_annotated_' + tissue + '.h5ad')
    tissue_data_raw[tissue].write(folder + 'tissue_data_raw_annotated_' + tissue + '.h5ad')