In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import pathlib as pl

In [None]:
import signaturescoring as ssc

In [None]:
from scipy.sparse import csr_matrix

In [None]:
from statannotations.Annotator import Annotator

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
datadir = pl.Path("/add/path/here")

In [None]:
counts = pd.read_csv(datadir / "GSE222078_counts.csv.gz")
counts.index = counts["Unnamed: 0"]
counts = counts.drop("Unnamed: 0", axis=1)
counts = counts.T

In [None]:
meta = pd.DataFrame("s"+counts.index.str.split("_").str[-1].to_numpy(), index=counts.index, columns=["Sample"])

In [None]:
meta["Patient"] = meta.Sample.replace({"s1": "P1", "s2": "P2", "s3": "P3", 
                                "s4": "P4", "s5": "P5", "s6": "P6", 
                                "s7": "P7", "s8": "P7", "s9": "P8", 
                                "s10": "P8"})

In [None]:
meta["Tumor status"] = meta.Sample.replace({"s1": "Tumor", "s2": "Tumor", "s3": "Tumor", 
                                "s4": "Tumor", "s5": "Tumor", "s6": "Tumor", 
                                "s7": "AN", "s8": "Tumor", "s9": "AN", 
                                "s10": "Tumor"})

In [None]:
X = csr_matrix(counts.round().values)

In [None]:
genes = pd.DataFrame(index=counts.columns)

In [None]:
adata = sc.AnnData(X, obs=meta, var=genes)

In [None]:
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
adata.obs.total_counts.hist(bins=20)

In [None]:
sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

In [None]:
sc.tl.pca(adata)

In [None]:
adata.write("/add/path/here/GSE222078_adata.h5ad")

# Download program signatures

In [None]:
signature_dir = pl.Path("/add/path/here")

full_sigs = {}
for s in (signature_dir).iterdir():
    sig = s.stem
    full_sigs[sig] = pd.read_csv(s,index_col=0)
    full_sigs[sig] = full_sigs[sig][~full_sigs[sig].index.str.startswith(("MT-","RPS","RPL"))]
    full_sigs[sig] = full_sigs[sig].head(100).index.ravel()

# Visualizing signatures in full cohort

In [None]:
adata = sc.read_h5ad("/add/path/here/GSE222078_adata.h5ad")

In [None]:
celltypes = pd.read_csv(datadir / "highLevelCellTypes.csv",index_col=0)

In [None]:
celltypes.index = celltypes.index.str.replace("-",".")

In [None]:
adata.obs = pd.concat([adata.obs,celltypes],axis=1)

In [None]:
sc.tl.pca(adata)

In [None]:
sc.external.pp.harmony_integrate(adata, key="Sample", max_iter_harmony=20)

In [None]:
sc.pp.neighbors(adata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(adata)

In [None]:
for sig, genes in full_sigs.items():
    ssc.score_signature(adata=adata,
                        gene_list=list(genes), 
                        method="adjusted_neighborhood_scoring", 
                        ctrl_size=150,
                        score_name=f"{sig}_score")

In [None]:
cnmf_names = [f"cNMF_{i}_score" for i in range(1,6)]

In [None]:
sc.pl.umap(adata, color=["celltype"]+cnmf_names,ncols=2)

In [None]:
fig, axs = plt.subplots(3,2, figsize=(10,10))
flatax = axs.flatten()
for i,ax in enumerate(flatax[:-1]):
    sns.boxplot(data=adata.obs, x="celltype", y=f"cNMF_{i+1}_score", ax=flatax[i])
    flatax[i].spines[['right', 'top']].set_visible(False)
    flatax[i].set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
    flatax[i].hlines(y=0, xmin=flatax[i].get_xlim()[0], xmax=flatax[i].get_xlim()[1], linestyles="dashed", color="grey")
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/external/boxplot_cNMF_score_per_celltype.png", dpi=250, bbox_inches="tight")

In [None]:
celltypes = adata.obs.celltype.unique()
celltypes = np.setdiff1d(celltypes, ["Epithelial"])
pairs = [("Epithelial",ct) for ct in celltypes]

In [None]:
fig, axs = plt.subplots(1,2, figsize=(10,5))
flatax = axs.flatten()
sns.boxplot(data=adata.obs, x="celltype", y=f"cNMF_2_score", ax=flatax[0])
flatax[0].spines[['right', 'top']].set_visible(False)
flatax[0].set_xticklabels(flatax[0].get_xticklabels(), rotation=45, ha="right")
flatax[0].set_xlabel("")
flatax[0].hlines(y=0, xmin=flatax[0].get_xlim()[0], xmax=flatax[0].get_xlim()[1], 
                 linestyles="dashed", color="grey")
annot = Annotator(
    flatax[0],
    pairs=pairs,
    data=adata.obs, x="celltype", y=f"cNMF_2_score",
)
annot.configure(
    test="Mann-Whitney",
    loc="inside",
    text_format="star",
    show_test_name=False,
    verbose=2,
    comparisons_correction=None,
    fontsize=10,
)
annot.apply_test()
_, test_results = annot.annotate()

sns.boxplot(data=adata.obs, x="celltype", y=f"cNMF_5_score", ax=flatax[1])
annot = Annotator(
    flatax[1],
    pairs=pairs,
    data=adata.obs, x="celltype", y=f"cNMF_5_score",
)
annot.configure(
    test="Mann-Whitney",
    loc="inside",
    text_format="star",
    show_test_name=False,
    verbose=2,
    comparisons_correction=None,
    fontsize=10,
)
annot.apply_test()
_, test_results = annot.annotate()
flatax[1].spines[['right', 'top']].set_visible(False)
flatax[1].set_xticklabels(flatax[1].get_xticklabels(), rotation=45, ha="right")
flatax[1].set_xlabel("")
flatax[1].hlines(y=0, xmin=flatax[1].get_xlim()[0], xmax=flatax[1].get_xlim()[1], linestyles="dashed", color="grey")


fig.tight_layout()
fig.savefig("figures/external/boxplot_cNMF_score_per_celltype_subset_cNMF2_5.png", dpi=250, bbox_inches="tight")

# Visualizing signatures in carcinoma cells

In [None]:
mTFs =['KLF5', 'ELF3', 'SMAD3', 'TCF7L2', 'HMGA2', "BNC2"]

In [None]:
subadata = adata[adata.obs.celltype.isin(["Epithelial"])].copy()

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, key="Sample", max_iter_harmony=20, sigma=0.6)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
for sig, genes in full_sigs.items():
    ssc.score_signature(adata=subadata,
                        gene_list=list(np.setdiff1d(genes,mTFs)), 
                        method="adjusted_neighborhood_scoring", 
                        ctrl_size=150,
                        score_name=f"{sig}_score")

In [None]:
ssc.score_signature(adata=subadata,
                        gene_list=["KLF5","ELF3","SMAD3","TCF7L2"], 
                        method="adjusted_neighborhood_scoring", 
                        ctrl_size=150,
                        score_name="mTF_score")

In [None]:
cnmf_names = [f"cNMF_{i}_score" for i in range(1,6)]

In [None]:
sc.pl.umap(subadata, color=cnmf_names,ncols=2, frameon=False)

In [None]:
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as mcolors

X_pca = pd.DataFrame(subadata.obsm["X_pca_harmony"][:,:2],index=subadata.obs_names,columns=["PC1","PC2"])

X_pca = pd.concat([X_pca, subadata.obs[["cNMF_1_score","cNMF_3_score","cNMF_4_score",
                                        "cNMF_2_score","cNMF_5_score"]]],axis=1)
X_pca.columns = ["PC1","PC2","cNMF_1","cNMF_3","cNMF_4",'cNMF_2',"cNMF_5"]

def plot_pcs_color(ax, state):
    vcenter = 0
    vmin, vmax = X_pca[state].min(), X_pca[state].max()
    normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
    colormap = matplotlib.colormaps['RdBu_r']
    sns.scatterplot(
        y=X_pca["PC2"],
        x=X_pca["PC1"],
        c=X_pca[state],
        s=15,
        norm=normalize,
        cmap=colormap,
        ax=ax
    )
    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(X_pca[state])
    fig.colorbar(scalarmappaple, ax=ax)
    ax.set_title(state)
    pretty_ax(ax)

fig, ax = plt.subplots(1,3, figsize=(15,4))
flatax = ax.flatten()

plot_pcs_color(flatax[0], "cNMF_3")
plot_pcs_color(flatax[1], "cNMF_1")
plot_pcs_color(flatax[2], "cNMF_4")
fig.tight_layout()
fig.savefig("figures/external/PC_wCNMF_score.svg", dpi=200, bbox_inches="tight")

fig, ax = plt.subplots(1,2, figsize=(10,4))
flatax = ax.flatten()

plot_pcs_color(flatax[0], "cNMF_2")
plot_pcs_color(flatax[1], "cNMF_5")
fig.tight_layout()

In [None]:
TF_expr = pd.DataFrame(subadata[:,mTFs].X.copy().toarray(),index=subadata.obs_names,columns=mTFs)
TF_expr = pd.concat([TF_expr,subadata.obs[["cNMF_1_score","cNMF_3_score","cNMF_4_score"]]],axis=1)

In [None]:
heatmap_df = TF_expr.corr().loc[["cNMF_3_score","cNMF_1_score","cNMF_4_score"],mTFs]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,2))
sns.heatmap(data=heatmap_df, annot=heatmap_df, cmap="vlag", center=0, ax=ax, fmt=".2f")
ax.set_yticklabels(["cNMF_3","cNMF_1","cNMF_4"])
fig.savefig("figures/external/heatmap_cNMF_TF_corr.png", dpi=300, bbox_inches="tight")

In [None]:
df = subadata.obs[["cNMF_3_score","cNMF_1_score","cNMF_4_score","mTF_score"]]
heatmap_df = df.corr().loc[["mTF_score"],["cNMF_3_score","cNMF_1_score","cNMF_4_score"]]
fig, ax = plt.subplots(1,1,figsize=(2,0.5))
sns.heatmap(data=heatmap_df, annot=heatmap_df, cmap="vlag", center=0, ax=ax)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_xticklabels(["cNMF_3","cNMF_1","cNMF_4"], rotation=45, ha="right")
fig.savefig("figures/external/heatmap_cNMF_mTFscore_corr.png", dpi=300, bbox_inches="tight")

In [None]:
subadata.obs[cnmf_names].corr(method="spearman")

In [None]:
g = sns.PairGrid(subadata.obs[["cNMF_1_score","cNMF_3_score","cNMF_4_score"]], diag_sharey=False, corner=True)
g.map_lower(sns.kdeplot)
g.map_diag(sns.histplot)
g.fig.savefig("figures/external/cNMF_relplot.png", dpi=200, bbox_inches="tight")