In [1]:
# Integration of biopsies and organ donors from 2 studies
import anndata, numpy as np, pandas as pd, imp, lpy, scanpy as sc
sc.logging.print_header()

scanpy==1.6.0 anndata==0.7.4 umap==0.4.6 numpy==1.18.1 scipy==1.5.3 pandas==1.0.1 scikit-learn==0.22.2 statsmodels==0.11.1 python-igraph==0.8.0 louvain==0.6.1 leidenalg==0.8.2


In [2]:
# Set some constants
adata = anndata.read_h5ad("N1-integrated_donors.h5ad")

In [3]:
imp.reload(lpy)
adata.obs["broad_celltypes"] =  lpy.applyFactorRename(adata.obs["leiden_scvi_sampl_cc"],{
    tuple(["1","2","7","10","13","21"]) : "Epithelial",
    "4" : "Endothelial",
    tuple(["11","18"]) : "Immune",
    tuple(["3","8","9","16","17"]) : "Supporting",
    tuple(["0","5","6", "12", "14", "19"]) : "Stromal",    
    tuple(["22","23","20","15"]) : "Excluded"
    }, doinspect=True)

[35;46;1mRename classes in list[0m[34m
def applyFactorRename(input, dico, doinspect = False):
    if doinspect is True: print("\033[35;46;1mRename classes in list\033[0m\033[34m"); print(inspect.getsource(applyFactorRename));
    import numpy as np
    tmp = np.array(input, dtype=object)
    out = tmp.copy()
    for k,x in dico.items():
        if isinstance(k, tuple):
            for l in k:
                out[tmp == l] = x
        else:
            out[tmp == k] = x
    leftover_categories = set(out)
    leftover_categories = leftover_categories.difference(set(dico.values()))
    categories = list(dico.values())
    categories = categories + list(leftover_categories)
    return(pd.Categorical(out, categories, ordered=True))



In [4]:
#reorder some categories for legend displays
adata.obs["Day"] = lpy.reorderCat(adata.obs["Day"], [8,0,1,2,3,4,5,6,7,9] )

         0
0     day8
1    day16
2    day17
3    day19
4    day20
5    day22
6    day23
7    day25
8    day26
9  unknown


In [5]:
# set default colors for annotations, and 
adata.uns["BinaryStage_colors"] = ['#AA0000', '#0000FF']
adata.uns["BiopsyType_colors"] = ['#1f77b4', '#ff7f0e']
adata.uns["Day_colors"] = ['#00FFFF', '#00A0FF', '#0080E8', '#006074', '#00A000', '#4B9000', '#968000', '#DD8000', '#FF4000', '#808080']
adata.uns["DonorID_colors"] = ['#a70000', '#e20000', '#4a4a00', '#848400', '#a5a500', '#005917', '#009f29', '#00c734', '#004a75', '#0084d2', '#12a7ff', '#410068', '#7a00c1', '#bb48ff', '#cd79ff']
adata.uns["SampleID_colors"] = ['#800000', '#a70000', '#e70000', '#a76060', '#e28080', '#4a4a00', '#404030', '#848400', '#707040', '#a5a500', '#909060', '#005917', '#009f29', '#00c734', '#004a75', '#0084d2', '#12a7ff', '#410068', '#7a00c1', '#bb48ff', '#cd79ff']
adata.uns["Location_colors"] = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
adata.uns["broad_celltypes_colors"] = ['#F08129', '#499ABA', '#48B750', '#AAAA66', '#A58A62', '#7979CC', '#888888']


In [6]:
adata.obs["general_celltype"] =  lpy.applyFactorRename(adata.obs["leiden_scvi_sampl_cc"],{
    "7" : "SOX9",
    tuple(["21","2"]) : "Lumenal",
    "13" : "Glandular",
    "1" : "Glandular_secretory",
    "10" : "Ciliated",
    "11" : "Lymphoid",
    "18" : "Myeloid",
    "4" : "Endothelial",
    "8" : "PV MYH11",
    "9" : "PV STEAP4",
    tuple(["3","16"]) : "uSMC",
    "17" : "Fibroblast C7",
    tuple(["5","6"]) : "Fibroblast eS",
    tuple(["0","14", "12", "19"]) : "Fibroblast dS",
    tuple(["22","23","20","15"]) : "Other"
    }, doinspect=True)
adata.obs["general_celltype"].value_counts()

[35;46;1mRename classes in list[0m[34m
def applyFactorRename(input, dico, doinspect = False):
    if doinspect is True: print("\033[35;46;1mRename classes in list\033[0m\033[34m"); print(inspect.getsource(applyFactorRename));
    import numpy as np
    tmp = np.array(input, dtype=object)
    out = tmp.copy()
    for k,x in dico.items():
        if isinstance(k, tuple):
            for l in k:
                out[tmp == l] = x
        else:
            out[tmp == k] = x
    leftover_categories = set(out)
    leftover_categories = leftover_categories.difference(set(dico.values()))
    categories = list(dico.values())
    categories = categories + list(leftover_categories)
    return(pd.Categorical(out, categories, ordered=True))



Fibroblast dS          25078
filtered               20503
Glandular_secretory    13100
Lumenal                12532
Fibroblast eS          12033
uSMC                    8930
Endothelial             7577
SOX9                    3646
PV MYH11                3414
PV STEAP4               3345
Ciliated                3189
Lymphoid                2311
Other                   1739
Glandular               1720
Fibroblast C7            932
Myeloid                  761
Name: general_celltype, dtype: int64

In [8]:
adata.write_h5ad("N2-integrated_donors.h5ad")