In [None]:
import pathlib as pl
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt

In [None]:
cell_type_markers = {"B": ["BANK1","CD19","CD79A","CD79B","IGLL5","MS4A1","VPREB3"], 
                     "CAF": ["BGN","COL1A1","COL1A2","FN1","PDGFRA","RGS5"], 
                     "Fibroblast": ["ACTA2","TAGLN"],
                     "Hepatocyte": ["ALB","HHEX","CYP2E1","SERPINA1"],
                     "Endothelial": ["CD34","CDH5","CLDN5","EGFL7","PECAM1","PLVAP","RAMP2","RAMP3","TM4SF1","VWF"],
                     "Epithelial": ["ACPP","AR","CD24","EPCAM","KLK2","KLK3","KLK4","KRT13","KRT18","KRT19","KRT8"], 
                     "Basal epithelial": ["KRT5","KRT14","TP63"],
                     "Macrophage": ["C1QA","CD163","CD68","CSF1R","FCGR1A","MERTK","MRC1","MS4A4A","MSR1"], 
                     'Mast': ["CPA3","HDC","KIT","RGS1","TPSAB1","TPSB2"], 
                     "NK": ["FCGR3A","FGFBP2","GNLY" ,"ITGAM","KLRB1","KLRC1","KLRD1","KLRF1","NCAM1","NKG7","PRF1"], 
                     "pDC": ["GZMB","IL3RA","IRF7","ITM2C","LILRA4","MZB1","SERPINF1"], 
                     "T": ["CD247","CD3D","CD3E","CD3G","SKAP1","THEMIS","TRAC"], 
                     "TAM": ["C1QB","C1QC","VSIG4"], 
                     "Treg": ["FOXP3"], 
                     "Smooth muscle": ["ACTG2","CNN1","MYH11","TAGLN"],
                     "EAC": ["KRAS","MUC1","AGR2"], "Nerve": ["NRXN1","NRXN3","NCAM1","NRG1"]}

In [None]:
datapath = pl.Path("/add/path/here/")

In [None]:
adata = sc.read_h5ad(datapath / "CCG1153_4496262_GEX_Cellbender_Scrublet.h5ad")

In [None]:
adata.raw = None

sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)
adata = adata[adata.obs.pct_counts_mt < 20, :].copy()

adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

In [None]:
for ct in cell_type_markers:
    sc.tl.score_genes(adata, gene_list=cell_type_markers[ct], score_name=f"{ct}_score")

In [None]:
sc.tl.pca(adata, n_comps=50)

In [None]:
sc.pp.neighbors(adata, use_rep="X_pca")

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=0.7)

In [None]:
sc.pl.umap(adata, color=["Basal epithelial_score"])

In [None]:
sc.pl.umap(adata, color=["log1p_total_counts","pct_counts_mt","leiden",], ncols=2)

In [None]:
for ct in cell_type_markers:
    fig, ax = plt.subplots(1,1)
    sc.pl.violin(adata, keys=[f"{ct}_score"], groupby="leiden", ax=ax)

In [None]:
manual_annotations = {"0": "Fibroblast/CAF", 
                      "1": "Endothelial", 
                      "2": "Fibroblast/CAF", 
                      "3": "Endothelial", 
                      "4": "Fibroblast/CAF", 
                      "5": "Epithelial", 
                      "6": "Epithelial", 
                      "7": "Smooth muscle", 
                      "8": "Epithelial", 
                      "9": "Smooth muscle", 
                      "10": "Macrophage",
                      "11": "Epithelial", 
                      "12": "Endothelial",
                      "13": "Macrophage", 
                      "14": "T/NK", 
                      "15": "Epithelial", 
                      "16": "Nerve", 
                      "17": "Fibroblast/CAF", }

In [None]:
adata.obs["celltype"] = adata.obs.leiden.replace(manual_annotations)

In [None]:
sc.tl.rank_genes_groups(adata, groupby="celltype")

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=4, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
import infercnvpy as cnv

In [None]:
gencode_df = pd.read_csv("/add/path/here/gencode_v41_positions.csv",index_col=0).set_index("gene_name")
gencode_df = gencode_df.loc[~gencode_df.index.duplicated()]
gencode_df = gencode_df.loc[gencode_df.index.intersection(adata.var.index)]

gencode_df = gencode_df.rename(columns={"seqname": "chromosome"})

In [None]:
adata.var = pd.concat([adata.var,gencode_df],axis=1)

In [None]:
subadata = adata[:,adata.var.mean_counts>=0.1].copy()

In [None]:
cnv.tl.infercnv(
    subadata,
    reference_key="celltype",
    reference_cat=[
        "Fibroblast/CAF",
        "Endothelial", "Smooth muscle",
        
    ],
    window_size=200,
)

In [None]:
cnv.pl.chromosome_heatmap(subadata, groupby="celltype")

In [None]:
cnv.tl.pca(subadata)
cnv.pp.neighbors(subadata)
cnv.tl.leiden(subadata)
cnv.tl.umap(subadata)
cnv.tl.cnv_score(subadata)

In [None]:
cnv.pl.chromosome_heatmap(subadata, groupby="cnv_leiden", dendrogram=True)

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(11, 11))
ax4.axis("off")
cnv.pl.umap(
    subadata,
    color="cnv_leiden",
    legend_loc="on data",
    legend_fontoutline=2,
    ax=ax1,
    show=False,
)
cnv.pl.umap(subadata, color="cnv_score", ax=ax2, show=False)
cnv.pl.umap(subadata, color="celltype", ax=ax3)

In [None]:
resdir = pl.Path("/add/path/here/")

adata.write(resdir / "CCG1153_4496262.h5ad")