In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import pathlib as pl

import cell2location

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs



def get_preprocessed_sample(sample_path: pl.Path, min_counts: int, pct_mt: int, min_cells: int) -> sc.AnnData:

    adata = sc.read_visium(path=sample_path)

    adata.var_names_make_unique()
    adata.var["mt"] = adata.var_names.str.startswith("MT-")
    sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)

    adata.obsm["spatial"] = adata.obsm["spatial"].astype(int)

    sc.pp.filter_cells(adata, min_counts=min_counts)
    adata = adata[adata.obs["pct_counts_mt"] < pct_mt]
    print(f"#cells after MT filter: {adata.n_obs}")
    sc.pp.filter_genes(adata, min_cells=min_cells)
    
    return adata

In [None]:
spatial_dir = pl.Path("/add/path/here/SpaceRanger_output/")

# Get single-cell reference

In [None]:
ad_sc = sc.read_h5ad("/add/path/here/full_cohort.h5ad")

refined_annotations = pd.read_csv("/add/path/here/refined_annotations.csv",index_col=0)

refined_annotations.columns = ["refined_annotations"]
highlevel_refined = {"Hepatocyte": "Epithelial", 
                     "Carcinoma": "Carcinoma", 
                     "Fibroblast": "Fibroblast", 
                     "Quiescent endothelial cells": "Endothelial", 
                     "Smooth muscle": "Muscle", 
                     "Skeletal muscle": "Muscle",
                     "TAM2": "Myeloid", "TAM3": "Myeloid",
                     "TCD4": "Lymphoid", 
                     "Inflammatory CAF": "Fibroblast", 
                     "Adipose CAF": "Fibroblast",
                     "HGF-CAF": "Fibroblast",
                     "TAM1": "Myeloid", 
                     "Myeloid-HighMT": "Unknown/technical", 
                     "Angiogenic EC": "Endothelial", 
                     "Quiescent EC": "Endothelial", 
                     "Venous EC": "Endothelial",
                     "TCD8": "Lymphoid", 
                     "B": "Lymphoid", 
                     "DC": "Myeloid", 
                     "Hepatic EC": "Endothelial", 
                     "Kupffer cells": "Myeloid", 
                     "NK": "Lymphoid", 
                     "Treg": "Lymphoid", 
                     "StrMus-HighMT": "Unknown/technical", 
                     "T-HighMT": "Unknown/technical", 
                     "Mast": "Myeloid", 
                     "Adipocytes": "Stromal/Muscle", 
                     "Endo-HighMT": "Unknown/technical"}
ad_sc.obs = pd.concat([ad_sc.obs,refined_annotations],axis=1)

ad_sc.obs["highlevel_refined"] = ad_sc.obs.refined_annotations.replace(highlevel_refined)

ad_sc = ad_sc[~ad_sc.obs["highlevel_refined"].isin(["Epithelial","Nerve/adrenal","Stromal/Muscle","Unknown/technical"])].copy()
ad_sc.X = ad_sc.layers["counts"].copy()

In [None]:
ad_sc.shape

In [None]:
from cell2location.utils.filtering import filter_genes
selected = filter_genes(ad_sc, cell_count_cutoff=5, cell_percentage_cutoff2=0.03, nonz_mean_cutoff=1.12)

# filter the object
ad_sc = ad_sc[:, selected].copy()

# Estimate NB regression

In [None]:
ref_run_name = "/add/path/here/Cell2Location_results/reference_signatures"

In [None]:
# prepare anndata for the regression model
cell2location.models.RegressionModel.setup_anndata(adata=ad_sc,
                        # 10X reaction / sample / batch
                        batch_key='sample_id',
                        # cell type, covariate used for constructing signatures
                        labels_key='highlevel_refined',
                       )

In [None]:
# create the regression model
from cell2location.models import RegressionModel
mod = RegressionModel(ad_sc)

# view anndata_setup as a sanity check
mod.view_anndata_setup()

In [None]:
mod.train(max_epochs=200, use_gpu=False)

In [None]:
mod.save(f"{ref_run_name}", overwrite=True)

In [None]:
mod.plot_history(20)

In [None]:
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
ad_sc = mod.export_posterior(
    ad_sc, sample_kwargs={'num_samples': 1000, 'batch_size': 2500, 'use_gpu': False}
)

# Save anndata object with results
adata_file = f"{ref_run_name}/sc.h5ad"
ad_sc.write(adata_file)
adata_file

In [None]:
mod.plot_QC()

In [None]:
# export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in ad_sc.varm.keys():
    inf_aver = ad_sc.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}'
                                    for i in ad_sc.uns['mod']['factor_names']]].copy()
else:
    inf_aver = ad_sc.var[[f'means_per_cluster_mu_fg_{i}'
                                    for i in ad_sc.uns['mod']['factor_names']]].copy()
inf_aver.columns = ad_sc.uns['mod']['factor_names']
inf_aver.iloc[0:5, 0:5]

In [None]:
inf_aver.to_csv("/add/path/here/Cell2Location_results/mean-expression-per-celltype.csv")