In [1]:
import sys
print(sys.version)

import pandas as pd
import numpy as np
import os
import scanpy as sc
import scvelo as scv
import scirpy as ir
import matplotlib.pyplot as plt
from matplotlib import rcParams
import scanpy.external as sce
import seaborn as sns
import matplotlib.ticker as mtick

plt.rcParams['font.sans-serif']=['Arial']
sc.settings.verbosity = 3  
sc.settings.set_figure_params(dpi=120,fontsize=8, frameon=False, figsize=(3, 3), facecolor='white')


KeyboardInterrupt: 

In [None]:
def preprocessing(adata, filter_cells, barc = '-1'):
    sc.pp.filter_genes(adata, min_cells=0.001*len(adata.obs))
    adata.var['mt'] = adata.var_names.str.startswith('MT-')
    adata.var['rb'] = adata.var_names.str.contains('^RP[SL]')
    adata.var['hb'] = adata.var_names.str.contains('^HB[APS]')
    adata.var['hsp'] = adata.var_names.str.contains('^HSP')
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt','rb','hb','hsp'], percent_top=None, log1p=False, inplace=True)
    adata = adata[adata.obs.n_genes_by_counts < 8000, :]
    adata = adata[adata.obs.n_genes_by_counts > 400, :]
    adata = adata[adata.obs.total_counts > 500, :]
    adata = adata[adata.obs.total_counts < 50000, :]
    adata = adata[adata.obs.pct_counts_mt < 10, :]
    adata = adata[adata.obs.pct_counts_hb < 5, :]
    pre_cell = len(adata.obs_names)
    cell_names = adata.obs_names.str.replace('-1',barc)
    adata = adata[~cell_names.isin(filter_cells)]
    after_cell = len(adata.obs_names)
    print("{0} cells filtered by double and mix".format(pre_cell-after_cell) )
    return adata

def normalization(adata, n_top = 5000):
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor='cell_ranger', n_top_genes=n_top)
    #adata = adata[:, adata.var.highly_variable]
    sc.pp.regress_out(adata, ['total_counts', 'n_genes_by_counts', 
                              'pct_counts_mt', 'pct_counts_hsp'])
    sc.tl.pca(adata, svd_solver='arpack')
    return adata

def dimensionalReduc(adata):
    sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
    tl.paga(adata)
    pl.paga(adata, plot=False)  # remove `plot=False` if you want to see the coarse-grained graph
    tl.umap(adata, init_pos='paga')
    return adata

## All cells

In [None]:
A_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/A_1_0/outs/filtered_feature_bc_matrix/',cache=True)
A_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/A_1_1/outs/filtered_feature_bc_matrix/',cache=True)
B_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/B_1_0/outs/filtered_feature_bc_matrix/',cache=True)
B_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/B_1_1/outs/filtered_feature_bc_matrix/',cache=True)
C_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/C_1_0/outs/filtered_feature_bc_matrix/',cache=True)
C_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/C_1_1/outs/filtered_feature_bc_matrix/',cache=True)
D_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/D_1_0/outs/filtered_feature_bc_matrix/',cache=True)
D_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/D_1_1/outs/filtered_feature_bc_matrix/',cache=True)
I_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/I_1_0/outs/filtered_feature_bc_matrix/',cache=True)
I_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/I_1_1/outs/filtered_feature_bc_matrix/',cache=True)
R_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/R_1_0/outs/filtered_feature_bc_matrix/',cache=True)
R_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/R_1_1/outs/filtered_feature_bc_matrix/',cache=True)
N_pre_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/N_1_0/outs/filtered_feature_bc_matrix/',cache=True)
N_post_ori = sc.read_10x_mtx('/share2/pub/zhangyr/zhangyr/cooperation/SJTU/ESCC/cellranger_v6/N_1_1/outs/filtered_feature_bc_matrix/',cache=True)

In [None]:
A_pre = preprocessing(A_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-1')
A_post = preprocessing(A_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-2')
B_pre = preprocessing(B_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-3')
B_post = preprocessing(B_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-4')
C_pre = preprocessing(C_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-5')
C_post = preprocessing(C_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-6')
D_pre = preprocessing(D_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-7')
D_post = preprocessing(D_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-8')
I_pre = preprocessing(I_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-9')
I_post = preprocessing(I_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-10')
R_pre = preprocessing(R_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-11')
R_post = preprocessing(R_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-12')
N_pre = preprocessing(N_pre_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-13')
N_post = preprocessing(N_post_ori,list(doublet.iloc[:,0])+list(mix['Barcode']), '-14')

In [None]:
A_con = A_pre.concatenate(A_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')
B_con = B_pre.concatenate(B_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')
C_con = C_pre.concatenate(C_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')
D_con = D_pre.concatenate(D_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')
I_con = I_pre.concatenate(I_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')
R_con = R_pre.concatenate(R_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')
N_con = N_pre.concatenate(N_post, batch_categories=['Pre', 'Post'],batch_key = 'treat')

In [None]:
adata_con = A_con.concatenate([B_con,C_con, D_con, I_con, R_con, N_con], 
                              batch_categories=['A', 'B','C','D','I','R','N'],
                              batch_key = 'patient')
adata_con.obs['treat_patient'] = adata_con.obs['treat'].str.cat(adata_con.obs['patient'],sep='_')

In [None]:
sc.external.pp.bbknn(adata_con, batch_key='treat_patient',neighbors_within_batch=3, trim=100)
sc.tl.umap(adata_con)
sc.tl.leiden(adata_con,resolution=0.3)
sc.pl.umap(adata_con, color=['patient', 'treat', 'leiden'])

In [None]:
immu_markers = ["CD3D","CD2","CD3E","CD3G",   ## T cell
                "CD79A",'MS4A1',"CD79B","JCHAIN",  ## B cell
                "DCN","COL1A1","COL1A2","FN1",  ## fibroblast
                "CCL14","VWF","PECAM1","ENG",    ## endothelial cell
                "RGS5","MCAM","ACTA2","MYH11",    ## pericyte
                "LYZ","CD14","GCA","HPGDS",  ## myeloid cell
                'KRT5','SFN','KRT14','EPCAM'  ## epithelial cell
               ]  
sc.pl.umap(adata_con, color=immu_markers)

## T cells

## B cells

## Myeloid cells