In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import palettable

import pathlib as pl

from tqdm.notebook import tqdm

In [None]:
import scib

from scipy.stats import fisher_exact

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=False,
        labelleft = False)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
def pretty_ax_wlabels(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

# Download data

In [None]:
adata = sc.read_h5ad("/add/path/here/full_cohort.h5ad")

In [None]:
refined_annotations = pd.read_csv("/add/path/here/refined_annotations.csv",index_col=0)

refined_annotations.columns = ["refined_annotations"]

In [None]:
patient_id_mapping = {"CCG1153_4496262": "P1", "CCG1153_6640539": "P2", 
                      "CCG1153_4411": "P3", "Aguirre_EGSFR0074": "P4", 
                      "Aguirre_EGSFR0148": "P5", "Aguirre_EGSFR1732": "P6", 
                      "Aguirre_EGSFR0128": "P7", "Aguirre_EGSFR1938": "P8", 
                      "Aguirre_EGSFR1982": "P9", "Aguirre_EGSFR2218": "P10"}

In [None]:
colorlist = palettable.colorbrewer.qualitative.Dark2_8.mpl_colors
colorlistbis = palettable.colorbrewer.qualitative.Paired_3.mpl_colors
colormapping_pat = {'Aguirre_EGSFR1982': colorlist[0], 
                    "Aguirre_EGSFR2218": colorlist[1], 
                    "CCG1153_4411": colorlist[2], 
                    "Aguirre_EGSFR1938": colorlist[3], 
                    "Aguirre_EGSFR0074": colorlist[4], 
                    "Aguirre_EGSFR0128": colorlist[5], 
                    "Aguirre_EGSFR1732": colorlist[6], 
                    "Aguirre_EGSFR0148": colorlist[7], 
                    "CCG1153_4496262": colorlistbis[0], 
                    "CCG1153_6640539": colorlistbis[1], 
                    "NA": "whitesmoke"}
colormapping_pat_bis = {patient_id_mapping[pat]: colormapping_pat[pat] for pat in patient_id_mapping}
colormapping_pat_bis["NA"] = "whitesmoke"

In [None]:
colorlist = palettable.colorbrewer.qualitative.Set1_7.mpl_colors
colormapping_mal = {"cNMF_1": colorlist[0], "cNMF_2": colorlist[1], "cNMF_3": colorlist[3], 
                    "cNMF_4": colorlist[4], "cNMF_5": colorlist[6]}
colormapping_mal["Mixed"] = "lightgrey"
colormapping_mal["Outlier"] = "grey"

# snRNA-seq

In [None]:
highlevel_refined = {"Hepatocyte": "Epithelial", 
                     "Carcinoma": "Carcinoma", 
                     "Fibroblast": "Fibroblast", 
                     "Quiescent endothelial cells": "Endothelial", 
                     "Smooth muscle": "Muscle", 
                     "Skeletal muscle": "Muscle",
                     "TAM2": "Myeloid", "TAM3": "Myeloid",
                     "TCD4": "Lymphoid", 
                     "Inflammatory CAF": "Fibroblast", 
                     "Adipose CAF": "Fibroblast",
                     "HGF-CAF": "Fibroblast",
                     "TAM1": "Myeloid", 
                     "Myeloid-HighMT": "Unknown/technical", 
                     "Angiogenic EC": "Endothelial", 
                     "Quiescent EC": "Endothelial", 
                     "Venous EC": "Endothelial",
                     "TCD8": "Lymphoid", 
                     "B": "Lymphoid", 
                     "DC": "Myeloid", 
                     "Hepatic EC": "Endothelial", 
                     "Kupffer cells": "Myeloid", 
                     "NK": "Lymphoid", 
                     "Treg": "Lymphoid", 
                     "StrMus-HighMT": "Unknown/technical", 
                     "T-HighMT": "Unknown/technical", 
                     "Mast": "Myeloid", 
                     "Adipocytes": "Stromal/Muscle", 
                     "Endo-HighMT": "Unknown/technical"}

adata.obs = pd.concat([adata.obs,refined_annotations],axis=1)
adata.obs["highlevel_refined"] = adata.obs.refined_annotations.replace(highlevel_refined)

In [None]:
scib.preprocessing.score_cell_cycle(adata, organism="human")

In [None]:
adata.obs[["refined_annotations","sample_id"]].to_csv("/add/path/here/refined_annotations_wsampleid.csv")

## Patient-level distributions

In [None]:
patlevel_counts = adata.obs[["sample_id","highlevel_refined"]].groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)].fillna(0).astype(int)

colorlist = sns.color_palette("colorblind", 10)
ctlist = adata.obs.highlevel_refined.unique()
colormapping = {ct: colorlist[i] for i,ct in enumerate(ctlist)}
colormapping["NA"] = "whitesmoke"

In [None]:
df = df.rename(index=patient_id_mapping)

In [None]:
def add_clinical_info(ax):

    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NAT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 4   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 5, 9   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,4))
df.plot(kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
ax = add_clinical_info(ax)
fig.savefig("figures/barplot_pat_tme_highlevel_scaled.png", dpi=300, bbox_inches="tight")
fig.savefig("figures/barplot_pat_tme_highlevel_scaled.svg", dpi=300, bbox_inches="tight")

In [None]:
patlevel_counts = adata.obs[["sample_id","highlevel_refined"]].groupby(by="sample_id").value_counts()
df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

colorlist = sns.color_palette("colorblind", 10)
ctlist = adata.obs.highlevel_refined.unique()
colormapping = {ct: colorlist[i] for i,ct in enumerate(ctlist)}
colormapping["NA"] = "whitesmoke"

In [None]:
df = df.rename(index=patient_id_mapping)

In [None]:
def add_clinical_info(ax):

    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 16000, 1000, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NAT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 4   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 16000, 1000, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 5, 9   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 16000, 1000, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,4))
df.plot(kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
fig.savefig("figures/barplot_pat_tme_highlevel_nonscaled.png", dpi=300, bbox_inches="tight")
fig.savefig("figures/barplot_pat_tme_highlevel_nonscaled.svg", dpi=300, bbox_inches="tight")

## UMAP viz

In [None]:
sc.tl.pca(adata)

sc.external.pp.harmony_integrate(adata, key="sample_id", max_iter_harmony=20)

sc.pp.neighbors(adata, use_rep="X_pca_harmony")

sc.tl.umap(adata)

In [None]:
fig = sc.pl.umap(adata, color=["highlevel_refined"], palette=colormapping, frameon=False, ncols=1, return_fig=True)
fig.savefig("figures/highlevel_refined_scRNA_umap.png", dpi=300, bbox_inches="tight")

In [None]:
adata.obs.highlevel_refined.value_counts()

In [None]:
fig = sc.pl.umap(adata, color=["sample_id"], frameon=False, ncols=1, return_fig=True, palette=colormapping_pat)
fig.savefig("figures/highlevel_refined_snRNA_umap_sampleid.png", dpi=300, bbox_inches="tight")

In [None]:
fig = sc.pl.umap(adata, color=["refined_annotations"], frameon=False, ncols=1, return_fig=True)
fig.savefig("figures/lowlevel_refined_snRNA_umap.png", dpi=300, bbox_inches="tight")

In [None]:
#import signaturescoring as ssc
marker_genes = {}
for cl in ["1","2","3","4","5"]:
    marker_genes[cl] = pd.read_csv(f"/add/path/here/cNMF_{cl}.csv",index_col=0)

for prog in marker_genes:
    sc.tl.score_genes(adata=adata,
                        gene_list=list(marker_genes[prog].head(100).index.ravel()), 
                        score_name=f"cNMF_{prog}_score")
    

In [None]:
adata.obs[[f"cNMF_{i}_score" for i in range(1,6)]+["sample_id","highlevel_refined"]].to_csv("/add/path/here/adata_cNMF_scores.csv")

In [None]:
score_annotations = adata.obs[[f"cNMF_{i}_score" for i in range(1,6)]+["sample_id","highlevel_refined"]].copy()
score_annotations["highlevel_wtop"] = score_annotations["highlevel_refined"].copy()

df = score_annotations[score_annotations["highlevel_refined"]=="Carcinoma"].copy()

top_cells = {}
for state in [f"cNMF_{i}_score" for i in range(1,6)]:
    top_cells[state] = df[df[state]>df[state].quantile(0.95)].index

vc = pd.Series(np.hstack(list(top_cells.values()))).value_counts()
to_keep = vc[vc==1].index

for state in top_cells:
    top_cells[state] = top_cells[state].intersection(to_keep)

for state in top_cells:
    score_annotations.loc[top_cells[state],"highlevel_wtop"] = state[:-6]

score_annotations.to_csv("/add/path/here/adata_cNMF_scores_wtop.csv")

In [None]:
fig, axs = plt.subplots(3,2, figsize=(10,10))
flatax = axs.flatten()
for i,ax in enumerate(flatax[:-1]):
    sns.boxplot(data=score_annotations[score_annotations["highlevel_refined"]=="Carcinoma"], 
                x="highlevel_wtop", y=f"cNMF_{i+1}_score", palette=(colormapping | colormapping_mal), 
                ax=flatax[i], order=["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5","Carcinoma"])
    flatax[i].spines[['right', 'top']].set_visible(False)
    flatax[i].set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
    flatax[i].hlines(y=0, xmin=flatax[i].get_xlim()[0], xmax=flatax[i].get_xlim()[1], linestyles="dashed", color="grey")
    flatax[i].set_xlabel("")
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/malignant/boxplot_cNMF_score_per_celltype.png", dpi=250, bbox_inches="tight")

In [None]:
fig, axs = plt.subplots(3,2, figsize=(10,10))
flatax = axs.flatten()
for i,ax in enumerate(flatax[:-1]):
    sns.boxplot(data=score_annotations, x="highlevel_refined", y=f"cNMF_{i+1}_score", palette=colormapping, ax=flatax[i])
    flatax[i].spines[['right', 'top']].set_visible(False)
    flatax[i].set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
    flatax[i].hlines(y=0, xmin=flatax[i].get_xlim()[0], xmax=flatax[i].get_xlim()[1], linestyles="dashed", color="grey")
    flatax[i].set_xlabel("")
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/malignant/boxplot_cNMF_score_per_celltype.png", dpi=250, bbox_inches="tight")

## Subset of malignant cells

In [None]:
subadata = adata[(adata.obs.highlevel_refined=="Carcinoma") & (adata.obs.CNV_celltype_annotation=="Tumor")].copy()

In [None]:
sc.tl.pca(subadata)
sc.pp.neighbors(subadata)
sc.tl.umap(subadata)

In [None]:
fig = sc.pl.umap(subadata, color=["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score","pid"], 
                 palette=colormapping_pat_bis, frameon=False, ncols=2, return_fig=True)
for i in range(5):
    fig.axes[2*i].set_title(f"cNMF$_{i+1}$ score")
fig.axes[-1].set_title("Patient ID")
fig.savefig("figures/malonly_unintegrated_cNMF_score_snRNA_umap.png", dpi=300, bbox_inches="tight")

In [None]:
sc.tl.pca(subadata)
sc.external.pp.harmony_integrate(subadata, key="sample_id", basis="X_pca", max_iter_harmony=20)
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")
sc.tl.umap(subadata)

In [None]:
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as mcolors
X_pca = pd.DataFrame(subadata.obsm["X_pca_harmony"][:,:2],index=subadata.obs_names,columns=["PC1","PC2"])

X_pca = pd.concat([X_pca, subadata.obs[["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score"]]],axis=1)
X_pca.columns = ["PC1","PC2","cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"]

def plot_pcs_color(ax, state):
    
    vmin, vmax = X_pca[state].min(), X_pca[state].max()
    #vcenter = 0
    vcenter = (X_pca[state].quantile(0.75) + X_pca[state].quantile(0.25))/2
    normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
    colormap = matplotlib.colormaps['RdBu_r']
    sns.scatterplot(
        y=X_pca["PC2"],
        x=X_pca["PC1"],
        c=X_pca[state],
        s=1,
        norm=normalize,
        cmap=colormap,
        ax=ax
    )
    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(X_pca[state])
    ax.set_title(state)
    fig.colorbar(scalarmappaple, ax=ax)
    pretty_ax(ax)

fig, ax = plt.subplots(1,5, figsize=(22.5,3))
flatax = ax.flatten()

plot_pcs_color(flatax[0], "cNMF$_{1}$")
plot_pcs_color(flatax[1], "cNMF$_{2}$")
plot_pcs_color(flatax[2], "cNMF$_{3}$")
plot_pcs_color(flatax[3], "cNMF$_{4}$")
plot_pcs_color(flatax[4], "cNMF$_{5}$")

fig.tight_layout()
fig.savefig("figures/malonly_pca_harmony.png", dpi=200, bbox_inches="tight")

In [None]:
fig = sc.pl.umap(subadata, color=["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score","pid"], 
                 palette=colormapping_pat_bis, frameon=False, ncols=2, return_fig=True)
for i in range(5):
    fig.axes[2*i].set_title(f"cNMF$_{i+1}$ score")
fig.axes[-1].set_title("Patient ID")
fig.savefig("figures/malonly_cNMF_score_snRNA_umap.png", dpi=300, bbox_inches="tight")

In [None]:
subadata.obs[["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score"]].corr()

In [None]:
from statannotations.Annotator import Annotator

df = subadata.obs.copy()
df.Metastatic = df.Metastatic.replace({True: "Metastatic", False: "Primary"})
fig, ax = plt.subplots(3,2, figsize=(8,8))
flatax = ax.flatten()

pairs = [("Metastatic", "Primary")]
for i,state in enumerate(["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score"]):

    medians = df[["Metastatic",state]].groupby("Metastatic").median()
    diff = (medians.loc["Metastatic"] - medians.loc["Primary"]).values[0]
    sns.boxplot(data=df, x="Metastatic",y=state,ax=flatax[i], order=["Primary", "Metastatic"],
            showcaps=True,boxprops={'facecolor':'None'},
             showfliers=False,whiskerprops={'linewidth':1}, zorder=20)
    sns.violinplot(data=df, x="Metastatic",y=state, order=["Primary", "Metastatic"], 
                   ax=flatax[i],inner="box", zorder=0)
    flatax[i].spines[["bottom","left"]].set_linewidth(4)
    flatax[i].spines[["top","right"]].set_visible(False)
    flatax[i].set_xticklabels(flatax[i].get_xticklabels(),rotation=45,ha="right")
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel(f"cNMF$_{i+1}$ score")
    flatax[i].text(-0.4, flatax[i].get_ylim()[1], f'$\Delta$ = {diff:.2f}')

    annot = Annotator(
        flatax[i], pairs,
        data=df, x="Metastatic",y=state, order=["Primary", "Metastatic"],
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction="BH",
        correction_format="replace",
    )
    annot.apply_test()
    flatax[i], _ = annot.annotate()
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/malonly_cNMF_score_metastasis_link.svg", dpi=200, bbox_inches="tight")

In [None]:
from statannotations.Annotator import Annotator

df = subadata.obs.copy()
df.TreatmentNaive = df.TreatmentNaive.replace({True: "Naïve", False: "Treated"})
fig, ax = plt.subplots(3,2, figsize=(8,8))
flatax = ax.flatten()

pairs = [("Naïve", "Treated")]
for i,state in enumerate(["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score"]):

    medians = df[["TreatmentNaive",state]].groupby("TreatmentNaive").median()
    diff = (medians.loc["Treated"] - medians.loc["Naïve"]).values[0]
    sns.boxplot(data=df, x="TreatmentNaive",y=state,ax=flatax[i], order=["Naïve", "Treated"],
            showcaps=True,boxprops={'facecolor':'None'},
             showfliers=False,whiskerprops={'linewidth':1}, zorder=20)
    sns.violinplot(data=df, x="TreatmentNaive",y=state, order=["Naïve", "Treated"], 
                   ax=flatax[i],inner="box", zorder=0)
    flatax[i].spines[["bottom","left"]].set_linewidth(4)
    flatax[i].spines[["top","right"]].set_visible(False)
    flatax[i].set_xticklabels(flatax[i].get_xticklabels(),rotation=45,ha="right")
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel(f"cNMF$_{i+1}$ score")
    flatax[i].text(-0.4, flatax[i].get_ylim()[1], f'$\Delta$ = {diff:.2f}')

    annot = Annotator(
        flatax[i], pairs,
        data=df, x="TreatmentNaive",y=state, order=["Naïve", "Treated"],
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction="BH",
        correction_format="replace",
    )
    annot.apply_test()
    flatax[i], _ = annot.annotate()
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/malonly_cNMF_score_treatment_naive_link.svg", dpi=200, bbox_inches="tight")

In [None]:
fig = sc.pl.umap(subadata, color=["log1p_total_counts","pct_counts_mt"], frameon=False, ncols=1, return_fig=True)
fig.savefig("figures/malonly_technical_harmony_snRNA_umap.png", dpi=300, bbox_inches="tight")

# snATAC-seq

In [None]:
atac = sc.read_h5ad("/add/path/here/combined_atac.h5ad")

In [None]:
# add the cNMF scores
adata_cnmf_scores = pd.read_csv("/add/path/here/adata_cNMF_scores.csv",index_col=0)

new_annot = []
for sample in atac.obs.dataset.unique():

    df1 = adata_cnmf_scores[adata_cnmf_scores.sample_id==sample].copy()
    df1.index = df1.index.str[:-2]
    
    df2 = atac.obs[atac.obs.sample_id==sample].copy()
    raw_idx = df2.index.copy()
    df2.index = np.hstack(df2.index.str.split("_").str[1:])
    
    dict_map = {df2.index[i]: raw_idx[i] for i in range(len(raw_idx))}
    
    df = pd.concat([df1.loc[df2.index.intersection(df1.index),['cNMF_1_score', 'cNMF_2_score',
       'cNMF_3_score', 'cNMF_4_score', 'cNMF_5_score']],df2.refined_annotation],axis=1).iloc[:,:-1]
    
    df = df.rename(index=dict_map)
    new_annot.append(df)
new_annot = pd.concat(new_annot)

atac.obs = pd.concat([atac.obs,new_annot],axis=1)

In [None]:
atac.obs["pid"] = atac.obs.sample_id.replace(patient_id_mapping)

In [None]:
atac.obs["highlevel_annotation"] = atac.obs.refined_annotation.replace(highlevel_refined)

In [None]:
atac.obsm["X_lsi_rd"] = atac.obsm["X_lsi"][:,:40]

In [None]:
sc.external.pp.harmony_integrate(atac, key="sample_id", basis="X_lsi_red", max_iter_harmony=20)

In [None]:
sc.pp.neighbors(atac, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(atac)

In [None]:
fig = sc.pl.umap(atac, color=["highlevel_annotation"], palette=colormapping, frameon=False, ncols=1, return_fig=True)
fig.savefig("figures/highlevel_refined_snATAC_umap.png", dpi=300, bbox_inches="tight")

In [None]:
fig = sc.pl.umap(atac, color=["dataset"], frameon=False, ncols=1, palette=colormapping_pat, return_fig=True)
fig.savefig("figures/highlevel_refined_snATAC_umap_sampleid.png", dpi=300, bbox_inches="tight")

## Patient-level distributions

In [None]:
patlevel_counts = atac.obs[["sample_id","highlevel_annotation"]].groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)].fillna(0).astype(int)

colorlist = sns.color_palette("colorblind", 10)
ctlist = adata.obs.highlevel_refined.unique()
colormapping = {ct: colorlist[i] for i,ct in enumerate(ctlist)}
colormapping["NA"] = "whitesmoke"

In [None]:
def add_clinical_info(ax):

    x1, x2 = 0, 0   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NAT", ha='center', va='bottom', color=col)
    
    x1, x2 = 1, 3   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 4, 8   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,4))
df.plot(kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 0.9,), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
ax = add_clinical_info(ax)
fig.savefig("figures/barplot_pat_tme_ATAC_highlevel_scaled.png", dpi=300, bbox_inches="tight")
fig.savefig("figures/barplot_pat_tme_ATAC_highlevel_scaled.svg", dpi=300, bbox_inches="tight")

In [None]:
patlevel_counts = atac.obs[["sample_id","highlevel_annotation"]].groupby(by="sample_id").value_counts()
df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)].fillna(0).astype(int)

colorlist = sns.color_palette("colorblind", 10)
ctlist = adata.obs.highlevel_refined.unique()
colormapping = {ct: colorlist[i] for i,ct in enumerate(ctlist)}
colormapping["NA"] = "whitesmoke"

In [None]:
def add_clinical_info(ax):

    x1, x2 = 0, 0   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 9000, 600, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NAT", ha='center', va='bottom', color=col)
    
    x1, x2 = 1, 3   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 9000, 600, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 4, 8   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 9000, 600, 'k'
    ax.plot([x1-0.4, x1-0.3, x2+0.3, x2+0.4], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,4))
df.plot(kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 0.9), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
fig.savefig("figures/barplot_pat_tme_ATAC_highlevel_nonscaled.png", dpi=300, bbox_inches="tight")
fig.savefig("figures/barplot_pat_tme_ATAC_highlevel_nonscaled.svg", dpi=300, bbox_inches="tight")

## Subset malignant

In [None]:
subatac = atac[atac.obs.highlevel_annotation=="Carcinoma"].copy()
subatac.obsm["X_lsi_red"] = subatac.obsm["X_lsi"][:,:40]

In [None]:
sc.pp.neighbors(subatac, use_rep="X_lsi_red")
sc.tl.umap(subatac)

In [None]:
fig = sc.pl.umap(subatac, color=["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score","pid"], 
                 palette=colormapping_pat_bis, frameon=False, ncols=2, return_fig=True)
for i in range(5):
    fig.axes[2*i].set_title(f"cNMF$_{i+1}$ score")
fig.axes[-1].set_title("Patient ID")
fig.savefig("figures/malonly_unintegrated_cNMF_score_snATAC_umap.png", dpi=300, bbox_inches="tight")

In [None]:
sc.external.pp.harmony_integrate(subatac, key="sample_id", basis="X_lsi_red", max_iter_harmony=20)
sc.pp.neighbors(subatac, use_rep="X_pca_harmony")
sc.tl.umap(subatac)

In [None]:
fig = sc.pl.umap(subatac, color=["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score","pid"], 
                 palette=colormapping_pat_bis, frameon=False, ncols=2, return_fig=True)
for i in range(5):
    fig.axes[2*i].set_title(f"cNMF$_{i+1}$ score")
fig.axes[-1].set_title("Patient ID")
fig.savefig("figures/malonly_cNMF_score_snATAC_umap.png", dpi=300, bbox_inches="tight")

In [None]:
fig = sc.pl.umap(subatac, color=["sample_id"], palette=colormapping_pat, frameon=False, ncols=1, return_fig=True)
fig.savefig("figures/malonly_sampleid_harmony_snATAC_umap.png", dpi=300, bbox_inches="tight")

# Heatmaps of marker genes/regions

## Regions

In [None]:
from statsmodels.stats.multitest import multipletests
from scipy.stats import pearsonr

In [None]:
highly_expressed_regions = subatac.var[subatac.var.percentile>0.25].index

highatac = subatac[:,highly_expressed_regions].copy()
# remove NA
highatac = highatac[highatac.obs.cNMF_1_score.dropna().index].copy()

In [None]:
n_bins = highatac.shape[1]//10000 + (1 if highatac.shape[1]%10000>0 else 0)

all_corrs = {state: [] for state in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]}
for state in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]:
    for i in tqdm(range(n_bins)):
        Xarray = pd.DataFrame(highatac.X[:,i*10000:(i+1)*10000].toarray(), 
                              index=highatac.obs_names, 
                              columns=highatac.var_names[i*10000:(i+1)*10000])
        all_corrs[state].append(Xarray.corrwith(highatac.obs[f"{state}_score"]))

In [None]:
n_bins = highatac.shape[1]//10000 + (1 if highatac.shape[1]%10000>0 else 0)

all_ps = {state: [] for state in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]}
for state in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]:
    for i in tqdm(range(n_bins)):
        Xarray = pd.DataFrame(highatac.X[:,i*10000:(i+1)*10000].toarray(), 
                              index=highatac.obs_names, 
                              columns=highatac.var_names[i*10000:(i+1)*10000])
        all_ps[state].append(Xarray.corrwith(highatac.obs[f"{state}_score"], method=lambda x, y: pearsonr(x, y)[1]))

In [None]:
most_corr_dir = pl.Path("/add/path/here/")
all_corrs = {state : pd.concat(all_corrs[state]) for state in all_corrs}

for state in all_corrs:
    all_corrs[state].to_csv(most_corr_dir / f"{state}_region_correlation.csv")

all_ps = {state : pd.concat(all_ps[state]) for state in all_ps}

for state in all_ps:
    all_ps[state].to_csv(most_corr_dir / f"{state}_region_pval.csv")

dar_regions = {state: all_corrs[state].sort_values(ascending=False).head(100) for state in all_corrs}

In [None]:
n_regions = {}
for state in all_corrs:
    statedf = pd.concat([all_corrs[state],all_ps[state]],axis=1)
    statedf.columns = ["Correlation", "p"]
    statedf["q"] = multipletests(all_ps[state].values.ravel())[1]
    
    n_regions[state] = [statedf[(statedf["Correlation"]>0.1) & (statedf["q"]<0.05)].shape[0]]

In [None]:
color=[colormapping_mal[f"cNMF_{i}"] for i in range(1,6)]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(4,2))
sns.barplot(data=pd.DataFrame(n_regions),ax=ax, palette=colormapping_mal)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax_wlabels(ax)
ax.bar_label(ax.containers[-1], fmt='%.0f', label_type='edge')
ax.set_xticks(ax.get_xticks(), ["cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"])
fig.savefig("figures/malonly_link_regions_cnmf_scores.svg", dpi=200, bbox_inches="tight")