In [1]:
# Integration of biopsies and organ donors from 2 studies
import anndata, numpy as np, pandas as pd, imp, lpy, scanpy as sc
sc.logging.print_header()
%load_ext rpy2.ipython
%matplotlib inline



scanpy==1.6.0 anndata==0.7.5 umap==0.3.10 numpy==1.18.1 scipy==1.4.1 pandas==1.0.1 scikit-learn==0.22.2 statsmodels==0.11.1 python-igraph==0.8.0 louvain==0.6.1 leidenalg==0.7.0


In [2]:
adata = anndata.read_h5ad("N6-integrated_donors.h5ad")

In [3]:
#Remove temporary annotation for cellxgene hosting
obs_toremove = ['batch', 'SampleID',
       '10x kit', 'Treatment',
       'Batch', 'scrublet_pred', 'scrublet_local_pred',
       'scrublet_score', 'scrublet_cluster_score', 'filtered_cells',
       'leiden_scvi_sampl_cc', 'leidenres2_scvi_sampl_cc',
       'leiden_endothelial_raw', 'leiden_immune_raw', 'leidenres3_immune_raw',
       'leiden_stromal_raw', 'leiden_suporting_raw', 'leiden_supporting_raw',
       'leiden_epithelial_raw', 'leiden_epithelial_subsample_raw',
       'leidenres2_epithelial_subsample_raw', 'leidenres2_epithelial_raw',
       'leiden_stromal_subsample_raw', 'leidenres2_stromal_subsample_raw',
       'subcluster_epithelial',
       'subcluster_stromal_balanced',
       'S_score', 'G2M_score', 'leidenres2_stromal_curated',
       'leidenres2_epithelial_curated', 'subcluster_epithelial_balanced_broad',
       'cosine_proj_bulkorg', 'cosine_proj_bulkorg_distance',
       'cosine_logXformedproj_bulkorg',
       'cosine_logXformedproj_bulkorg_distance', 'logist_proj_bulkorg',
       'logist_proj_bulkorg_probability', 'fine_celltypes',
       'cosine_proj_bulkorg_noinhib', 'cosine_proj_bulkorg_noinhib_distance',
       'cosine_logXformedproj_bulkorg_noinhib',
       'cosine_logXformedproj_bulkorg_noinhib_distance',
       'logist_proj_bulkorg_noinhib',
       'logist_proj_bulkorg_noinhib_probability',
       'cosine_proj_bulkorg_hormones', 'cosine_proj_bulkorg_hormones_distance',
       'cosine_logXformedproj_bulkorg_hormones',
       'cosine_logXformedproj_bulkorg_hormones_distance',
       'logist_proj_bulkorg_hormones',
       'logist_proj_bulkorg_hormones_probability', 'cosine_proj_bulkorg_broad',
       'cosine_proj_bulkorg_broad_distance',
       'cosine_logXformedproj_bulkorg_broad',
       'cosine_logXformedproj_bulkorg_broad_distance',
       'logist_proj_bulkorg_broad', 'logist_proj_bulkorg_broad_probability',
       'cosine_proj_bulkorg_hormnoinhib',
       'cosine_proj_bulkorg_hormnoinhib_distance',
       'cosine_logXformedproj_bulkorg_hormnoinhib',
       'cosine_logXformedproj_bulkorg_hormnoinhib_distance',
       'logist_proj_bulkorg_hormnoinhib',
       'logist_proj_bulkorg_hormnoinhib_probability',
       'cosine_proj_bulkorg_hormnoinhib_res2',
       'cosine_proj_bulkorg_hormnoinhib_res2_distance',
       'cosine_logXformedproj_bulkorg_hormnoinhib_res2',
       'cosine_logXformedproj_bulkorg_hormnoinhib_res2_distance',
       'logist_proj_bulkorg_hormnoinhib_res2',
       'logist_proj_bulkorg_hormnoinhib_res2_probability',
       'cosine_proj_bulkorg_strhormnoinhib',
       'cosine_proj_bulkorg_strhormnoinhib_distance',
       'cosine_logXformedproj_bulkorg_strhormnoinhib',
       'cosine_logXformedproj_bulkorg_strhormnoinhib_distance',
       'logist_proj_bulkorg_strhormnoinhib',
       'logist_proj_bulkorg_strhormnoinhib_probability',
       'Day', 'StudyName', 'Women age', 'subcluster_immune', 'Wang_celltype' ]
obsm_toremove = ['X_cosproj_bulkorg',
 'X_cosproj_bulkorg_hormones',
 'X_cosproj_bulkorg_noinhib',
 'X_cosproj_bulkorg_strhormnoinhib',
 'X_logistproj_bulkorg',
 'X_logistproj_bulkorg_hormones',
 'X_logistproj_bulkorg_noinhib',
 'X_logistproj_bulkorg_strhormnoinhib',
 'X_lxfcosproj_bulkorg',
 'X_lxfcosproj_bulkorg_hormones',
 'X_lxfcosproj_bulkorg_noinhib',
 'X_lxfcosproj_bulkorg_strhormnoinhib',
 'X_umap_endothelial',
 'X_umap_epithelial',
 'X_umap_epithelial_subsample',
 'X_umap_immune',
 'X_umap_stromal',
 'X_umap_stromal_subsample',
 'X_umap_suporting',
 'X_umap_supporting',
 'endothelial',
 'epithelial',
 'epithelial_curated',
 'epithelial_subsample',
 'immune',
 'scvi_sampl_cc',
 'stromal',
 'stromal_curated',
 'stromal_subsample',
 'suporting',
 'supporting', 'X_Wang_umap', 'X_umap_stromal_curated' , 'X_umap_immune_curated' ]

# rename fibroblast
# rename fibroblast

for x in obs_toremove:
    del adata.obs[x]
for x in obsm_toremove:
    del adata.obsm[x]

In [4]:
adata_main = adata[ [x not in ["filtered"] for x in adata.obs["broad_celltypes"] ] ]
adata_main.obs["general_celltypes"] =  lpy.applyFactorRename(adata_main.obs["general_celltypes"] ,{
    "SOX9" : "SOX9",
    "Lumenal" : "Lumenal",
    "Glandular" : "Glandular",
    "Ciliated" : "Ciliated",
    "Lymphoid" : "Lymphoid",
    "Myeloid" : "Myeloid",
    "Endothelial ACKR1" : "Endothelial ACKR1",
    "Endothelial SEMA3G" : "Endothelial SEMA3G",
    "PV MYH11" : "PV MYH11",
    "PV STEAP4" : "PV STEAP4",
    "uSMC" : "uSMC",
    "Fibroblast C7" : "Fibroblast C7",
    "Fibroblast eS" : "eS",
    "Fibroblast dS" : "dS",
    "Other" : "Other"
    }, doinspect=True)
del adata_main.obs["subcluster_epithelial_balanced"]
del adata_main.obsm["X_umap_epithelial_curated"]
print(adata_main.obs["general_celltypes"].value_counts())
adata_main.obs = adata_main.obs.rename(columns ={"broad_celltypes" : "Broad cell type", "general_celltypes" : "Cell type", "phase" : "CellCycle Phase", "sample_names" : "SampleID"})
adata_main.uns["Broad cell type_colors"] = adata_main.uns["broad_celltypes_colors"];
del adata_main.uns["broad_celltypes_colors"]
adata_main.uns["Cell type_colors"] = adata_main.uns["general_celltypes_colors"];
del adata_main.uns["general_celltypes_colors"]
adata_main.uns["CellCycle Phase_colors"] = adata_main.uns["phase_colors"];
del adata_main.uns["phase_colors"]
adata_main.write_h5ad("Integrated_donors.h5ad")

Trying to set attribute `.obs` of view, copying.


[35;46;1mRename classes in list[0m[34m
def applyFactorRename(input, dico, doinspect = False):
    if doinspect is True: print("\033[35;46;1mRename classes in list\033[0m\033[34m"); print(inspect.getsource(applyFactorRename));
    import numpy as np
    tmp = np.array(input, dtype=object)
    out = tmp.copy()
    for k,x in dico.items():
        if isinstance(k, tuple):
            for l in k:
                out[tmp == l] = x
        else:
            out[tmp == k] = x
    leftover_categories = set(out)
    leftover_categories = leftover_categories.difference(set(dico.values()))
    categories = list(dico.values())
    categories = categories + list(leftover_categories)
    return(pd.Categorical(out, categories, ordered=True))

dS                    24436
Lumenal               14252
Glandular             13100
eS                    12675
uSMC                   7746
Endothelial ACKR1      7188
SOX9                   3646
PV MYH11               3414
PV STEAP4              3345
Ciliate

In [5]:
adata = adata[  [x not in ["filtered", "Low QC", "doublets" ] for x in adata.obs["subcluster_epithelial_balanced"] ] ] 

In [6]:
del adata.obs["broad_celltypes"]
del adata.obs["general_celltypes"]
del adata.obsm["X_umap_scvi_sampl_cc"]
adata.uns["CellCycle Phase_colors"] = adata.uns["phase_colors"];
del adata.uns["phase_colors"]
adata.uns["Epithelial celltype_colors"] = adata.uns["subcluster_epithelial_balanced_colors"];
del adata.uns["subcluster_epithelial_balanced_colors"]
adata.obs = adata.obs.rename(columns ={"subcluster_epithelial_balanced" : "Epithelial celltype", "phase" : "CellCycle Phase", "sample_names" : "SampleID"})
adata.write_h5ad("Integrated_donors_Epithelial.h5ad")