In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import pathlib as pl

from tqdm.notebook import tqdm

import palettable

# Download data

In [None]:
adata = sc.read_h5ad("/add/path/here/full_cohort.h5ad")

In [None]:
clinical = pd.read_csv("/add/path/here/EAC_clinical_info.csv",index_col=0)
treatment_mapping = {"Neoadjuvant CROSS": "Neoadj. chemo", "Neoadjuvent carboplatin": "Neoadj. chemo"}
metastatic = (clinical["Tumor?"]=="Yes ") & (clinical["Site"].str.contains("metastasis"))
metastatic.name = "Metastatic?"
clinical["Metastatic?"] = metastatic

clinical["Location"] = clinical["Site"].replace({"GEJ": "Esophagus/GEJ", 
                          "Esophagus": "Esophagus/GEJ"})
clinical["Location"][clinical["Location"].str.contains("Liver")] = "Liver"
clinical["Location"][clinical["Location"].str.contains("Adrenal")] = "Adrenal gland"
clinical["Location"][clinical["Location"].str.contains("Peritoneal")] = "Peritoneum"

clinical["Stage"] = clinical["Grade/stage"].replace({"Stage IV ": "IV", "Stage IV": "IV", 
                                                     "Moderately differentiated; ypT1aN0": "I", 
                                                     "Moderately differentiated; pT1aN0": "I", 
                                                     "Poorly differentiated; ypT2N0": "II", 
                                                     "Presented with stage III became stage IV during esophagectomy when pleural metastases were identified": "III/IV"})

clinical["Treatment"] = ["Neoadj. chemo",
                         "None",
                         "Neoadj. chemo + ICI + RT",
                         "None",
                         "None",
                         "Chemo + HER2 targeted + ICI", 
                         "Neoadj. chemo + HER2 targeted", 
                         "Neoadj. chemo + ICI", 
                         "None",
                         "Neoadj. chemo + VEGFR2i"]

clinical["HER2 status"] = clinical["HER2"].replace({"HER 2 1+": "1+/equivocal"})

clinical = clinical.sort_values(by=["Tumor?","Metastatic?","Location"])

clinical["PD-L1 CPS score"] = [0,2,7,2,3,24,0,8,3,15]

In [None]:
refined_annotations = pd.read_csv("/add/path/here/refined_annotations.csv",index_col=0)

refined_annotations.columns = ["refined_annotations"]

In [None]:
highlevel_refined = {"Hepatocyte": "Epithelial", 
                     "Carcinoma": "Carcinoma", 
                     "Fibroblast": "Fibroblast", 
                     "Quiescent endothelial cells": "Endothelial", 
                     "Smooth muscle": "Muscle", 
                     "Skeletal muscle": "Muscle",
                     "TAM2": "Myeloid", "TAM3": "Myeloid",
                     "TCD4": "Lymphoid", 
                     "Inflammatory CAF": "Fibroblast", 
                     "Adipose CAF": "Fibroblast",
                     "HGF-CAF": "Fibroblast",
                     "TAM1": "Myeloid", 
                     "Myeloid-HighMT": "Unknown/technical", 
                     "Angiogenic EC": "Endothelial", 
                     "Quiescent EC": "Endothelial", 
                     "Venous EC": "Endothelial",
                     "TCD8": "Lymphoid", 
                     "B": "Lymphoid", 
                     "DC": "Myeloid", 
                     "Hepatic EC": "Endothelial", 
                     "Kupffer cells": "Myeloid", 
                     "NK": "Lymphoid", 
                     "Treg": "Lymphoid", 
                     "StrMus-HighMT": "Unknown/technical", 
                     "T-HighMT": "Unknown/technical", 
                     "Mast": "Myeloid", 
                     "Adipocytes": "Stromal/Muscle", 
                     "Endo-HighMT": "Unknown/technical"}

In [None]:
adata.obs = pd.concat([adata.obs,refined_annotations],axis=1)

adata.obs["highlevel_refined"] = adata.obs.refined_annotations.replace(highlevel_refined)

In [None]:
colorlist = palettable.colorbrewer.qualitative.Dark2_8.mpl_colors
colorlistbis = palettable.colorbrewer.qualitative.Paired_3.mpl_colors
colormapping_pat = {'Aguirre_EGSFR1982': colorlist[0], 
                    "Aguirre_EGSFR2218": colorlist[1], 
                    "CCG1153_4411": colorlist[2], 
                    "Aguirre_EGSFR1938": colorlist[3], 
                    "Aguirre_EGSFR0074": colorlist[4], 
                    "Aguirre_EGSFR0128": colorlist[5], 
                    "Aguirre_EGSFR1732": colorlist[6], 
                    "Aguirre_EGSFR0148": colorlist[7], 
                    "CCG1153_4496262": colorlistbis[0], 
                    "CCG1153_6640539": colorlistbis[1]}

# Lymphoid

In [None]:
os.makedirs("figures/lymphoid",exist_ok=True)

subadata = adata[adata.obs["highlevel_refined"]=="Lymphoid"].copy()

In [None]:
subadata.obs.refined_annotations.value_counts()

In [None]:
colorlist = palettable.colorbrewer.sequential.Greys_9.mpl_colors
ctlist = subadata.obs.refined_annotations.unique()
colormapping = {ct: colorlist[i+2] for i,ct in enumerate(ctlist)}
colormapping["B"] = colorlist[8]

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, sigma=0.5,
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
fig= sc.pl.umap(subadata, color=["sample_id"], 
           ncols=1, frameon=False, palette=colormapping_pat, return_fig=True)
fig.savefig("figures/lymphoid/umap_sample_id.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["refined_annotations"], 
           ncols=1, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/lymphoid/umap_annotations.png",dpi=200,bbox_inches="tight")

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="refined_annotations")

In [None]:
markers = {'T-cell': ["CD3E","CD3G"], 
           'Th': ["CD4"], 
           'T cytotoxic': ["CD8B","GZMA"], 
           "Treg": ["FOXP3","PDCD1","TIGIT",], 
           "NK": ["NKG7","NCAM1",], "B": ["IGHA1"]}

In [None]:
fig = sc.pl.dotplot(subadata, var_names=markers, groupby="refined_annotations", standard_scale="var", dot_max=0.5, return_fig=True)
fig.savefig("figures/lymphoid/dotplot_knownmarkers.png",dpi=200,bbox_inches="tight")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.refined_annotations.unique()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
fig = sc.pl.dotplot(subadata,
              var_names=group_markers, groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/lymphoid/dotplot_groupmarkers.png",dpi=200,bbox_inches="tight")

In [None]:
patlevel_counts = subadata.obs[["sample_id","refined_annotations"]]
patlevel_counts = patlevel_counts.groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

In [None]:
def add_clinical_info(ax):
    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 3   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 4, 8   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,3))
df.plot (kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
fig.savefig("figures/lymphoid/barplot_pat_tme_scaled.png", dpi=300, bbox_inches="tight")

# Myeloid

In [None]:
os.makedirs("figures/myeloid",exist_ok=True)

subadata = adata[adata.obs["highlevel_refined"]=="Myeloid"].copy()

In [None]:
subadata.obs.refined_annotations = subadata.obs.refined_annotations.replace({"TAM3":"TAM2"})

In [None]:
subadata.obs["refined_annotations"].value_counts()

In [None]:
colorlist = palettable.colorbrewer.sequential.Greens_9.mpl_colors
ctlist = subadata.obs.refined_annotations.unique()
colormapping = {ct: colorlist[i+1] for i,ct in enumerate(ctlist)}

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20, sigma=0.1)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
fig= sc.pl.umap(subadata, color=["sample_id"], 
           ncols=1, frameon=False,  palette=colormapping_pat, return_fig=True)
fig.savefig("figures/myeloid/umap_sample_id.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["refined_annotations"], 
           ncols=1, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/myeloid/umap_annotations.png",dpi=200,bbox_inches="tight")

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="refined_annotations")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.refined_annotations.unique()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
fig = sc.pl.dotplot(subadata,
              var_names=group_markers, groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/myeloid/dotplot_groupmarkers.png",dpi=200,bbox_inches="tight")
fig.savefig("figures/myeloid/dotplot_groupmarkers.svg",dpi=200,bbox_inches="tight")

In [None]:
fig = sc.pl.dotplot(subadata, var_names={#"Mast": ["KIT","TPSAB1","CPA3"], 
    "Mast": ["KIT",], 
                                   #"pDC": ["LILRA4","GZMB","IL3RA"], 
    "pDC": ["LILRB4","GZMA","IL3RA"],
                                   "cDC1": ["CLEC9A","FLT3","IDO1"], 
                                   #"cDC2": ["CD1C","FCER1A","HLA-DQA1"],
                                   "cDC2": ["HLA-DQA1",],
                                   "cDC3": ["LAMP3","CCR7","FSCN1"], 
                                   "Mono_CD14": ["FCN1","S100A8","S100A9"], 
                                   "Mono_CD16": ["FCGR3A","LST1","LILRB2"], 
                                   "Macro_INHBA": ["INHBA","IL1RN","CCL4"], 
                                   #"Macro_NLRP3": ["NLRP3","EREG","IL1B"], 
    "Macro_NLRP3": ["NLRP3","EREG",], 
                                   "Macro_LYVE1": ["LYVE1","PLTP","SELENOP"], 
                                   "Macro_C1QC": ["C1QC","C1QA","APOE"], 
    "Kupffer-like": ["MARCO","VSIG4","TIMD4"],
    "Mono/Macro": ["CD68","CD163"], 
    "Mono-to-Macro": ["CLEC5A"], 
    "TRM": ["ITGAM","ADGRE1","MERTK","FCGR1A"]}, 
                    groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/myeloid/dotplot_knownmarkers.png",dpi=200,bbox_inches="tight")
fig.savefig("figures/myeloid/dotplot_knownmarkers.svg",dpi=200,bbox_inches="tight")

In [None]:
patlevel_counts = subadata.obs[["sample_id","refined_annotations"]]
patlevel_counts = patlevel_counts.groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

In [None]:
def add_clinical_info(ax):
    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 3   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 4, 8   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,3))
df.plot (kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
fig.savefig("figures/myeloid/barplot_pat_tme_scaled.png", dpi=300, bbox_inches="tight")
fig.savefig("figures/myeloid/barplot_pat_tme_scaled.svg", dpi=300, bbox_inches="tight")

In [None]:
myeloid_markers = {}
for ct in group_markers.keys():
    myeloid_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    myeloid_markers[ct].to_csv(f"/add/path/here/marker_genes/myeloid/{ct}.csv")

# Endothelial 

In [None]:
os.makedirs("figures/endothelial",exist_ok=True)

subadata = adata[adata.obs["highlevel_refined"]=="Endothelial"].copy()

In [None]:
subadata.obs.refined_annotations = subadata.obs.refined_annotations.replace({"Endothelial": "Quiescent EC"})

In [None]:
subadata.obs.refined_annotations.value_counts()

In [None]:
colorlist = palettable.colorbrewer.sequential.RdPu_9.mpl_colors
ctlist = subadata.obs.refined_annotations.unique()
colormapping = {ct: colorlist[2*i+1] for i,ct in enumerate(ctlist)}

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
fig= sc.pl.umap(subadata, color=["sample_id"], 
           ncols=1, frameon=False,  palette=colormapping_pat, return_fig=True)
fig.savefig("figures/endothelial/umap_sample_id.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["refined_annotations"], 
           ncols=1, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/endothelial/umap_annotations.png",dpi=200,bbox_inches="tight")

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="refined_annotations")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.refined_annotations.unique()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
fig = sc.pl.dotplot(subadata,
              var_names=group_markers, groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/endothelial/dotplot_groupmarkers.png",dpi=200,bbox_inches="tight")

In [None]:
patlevel_counts = subadata.obs[["sample_id","refined_annotations"]]
patlevel_counts = patlevel_counts.groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

In [None]:
def add_clinical_info(ax):
    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 4   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 5, 9   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,3))
df.plot (kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
fig.savefig("figures/endothelial/barplot_pat_tme_scaled.png", dpi=300, bbox_inches="tight")

# Muscle/adipocytes

In [None]:
os.makedirs("figures/muscle",exist_ok=True)

subadata = adata[adata.obs["highlevel_refined"].isin(["Muscle","Stromal/Muscle"])].copy()

In [None]:
subadata.obs.refined_annotations = subadata.obs.refined_annotations.replace({"Stromal/Muscle": "Adipocytes"})

In [None]:
subadata.obs.refined_annotations.value_counts()

In [None]:
colorlist = palettable.colorbrewer.sequential.YlOrBr_4.mpl_colors
ctlist = subadata.obs.refined_annotations.unique()
colormapping = {ct: colorlist[i+1] for i,ct in enumerate(ctlist)}

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
fig= sc.pl.umap(subadata, color=["sample_id"], 
           ncols=1, frameon=False,  palette=colormapping_pat, return_fig=True)
fig.savefig("figures/muscle/umap_sample_id.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["refined_annotations"], 
           ncols=1, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/muscle/umap_annotations.png",dpi=200,bbox_inches="tight")

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="refined_annotations")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.refined_annotations.unique()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
fig = sc.pl.dotplot(subadata,
              var_names=group_markers, groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/muscle/dotplot_groupmarkers.png",dpi=200,bbox_inches="tight")

In [None]:
patlevel_counts = subadata.obs[["sample_id","refined_annotations"]]
patlevel_counts = patlevel_counts.groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

In [None]:
def add_clinical_info(ax):
    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 4   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 5, 9   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,3))
df.plot (kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
fig.savefig("figures/muscle/barplot_pat_tme_scaled.png", dpi=300, bbox_inches="tight")

# Fibroblasts

In [None]:
os.makedirs("figures/fibroblast",exist_ok=True)

subadata = adata[adata.obs["highlevel_refined"].isin(["Fibroblast"])].copy()

In [None]:
subadata.obs.refined_annotations.value_counts()

In [None]:
colorlist = palettable.colorbrewer.sequential.Oranges_5.mpl_colors
ctlist = subadata.obs.refined_annotations.unique()
colormapping = {ct: colorlist[i+1] for i,ct in enumerate(ctlist)}

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
fig= sc.pl.umap(subadata, color=["sample_id"], 
           ncols=1, frameon=False,  palette=colormapping_pat, return_fig=True)
fig.savefig("figures/fibroblast/umap_sample_id.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["refined_annotations"], 
           ncols=1, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/fibroblast/umap_annotations.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["RUNX1","RUNX2"], 
           ncols=2, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/fibroblast/runx1-runx2-umap.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["HGF","ETS1"], 
           ncols=2, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/fibroblast/hgf-ets1-umap.png",dpi=200,bbox_inches="tight")

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="refined_annotations")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.refined_annotations.unique()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
fig = sc.pl.dotplot(subadata,
              var_names=group_markers, groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/fibroblast/dotplot_groupmarkers.png",dpi=200,bbox_inches="tight")
fig.savefig("figures/fibroblast/dotplot_groupmarkers.svg",dpi=200,bbox_inches="tight")

In [None]:
# genes from here https://www.nature.com/articles/s41467-022-34395-2
fig = sc.pl.dotplot(subadata, var_names={"CAFinfla": ["FAP","COL1A1","TGFB1","MMP11",],
                                   "CAFmyo": ["RGS5","MYH11","ACTA2"], 
                                   "CAFadi": ["CFD","PTGDS","FBLN1"], 
                                   "CAFendMT": ["PLVAP","RAMP2","FLT1"], 
                                   "CAFpn": ["S100B","GPM6B","NRXN1"], 
                                   "CAFap": ["HLA-DRA","LYZ"], 
                                   "Fibroblast": ["PDGFRA","PDGFRB","THY1",]}, 
                    groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/fibroblast/dotplot_knownmarkers.png",dpi=200,bbox_inches="tight")
fig.savefig("figures/fibroblast/dotplot_knownmarkers.svg",dpi=200,bbox_inches="tight")

In [None]:
fibroblast_markers = {}
for ct in group_markers.keys():
    fibroblast_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    fibroblast_markers[ct].to_csv(f"/add/path/here/marker_genes/fibroblast/{ct}.csv")

In [None]:
patlevel_counts = subadata.obs[["sample_id","refined_annotations"]]
patlevel_counts = patlevel_counts.groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

In [None]:
def add_clinical_info(ax):
    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 4   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 5, 9   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,3))
df.plot (kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
fig.savefig("figures/fibroblast/barplot_pat_tme_scaled.png", dpi=300, bbox_inches="tight")
fig.savefig("figures/fibroblast/barplot_pat_tme_scaled.svg", dpi=300, bbox_inches="tight")

# Epithelial

In [None]:
os.makedirs("figures/epithelial",exist_ok=True)

subadata = adata[adata.obs["highlevel_refined"].isin(["Epithelial"])].copy()

In [None]:
subadata.obs.refined_annotations.value_counts()

In [None]:
colorlist = palettable.colorbrewer.sequential.Blues_3.mpl_colors
ctlist = subadata.obs.refined_annotations.unique()
colormapping = {ct: colorlist[i+1] for i,ct in enumerate(ctlist)}

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.pp.neighbors(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
fig= sc.pl.umap(subadata, color=["sample_id"], 
           ncols=1, frameon=False, palette=colormapping_pat, return_fig=True)
fig.savefig("figures/epithelial/umap_sample_id.png",dpi=200,bbox_inches="tight")

In [None]:
fig= sc.pl.umap(subadata, color=["refined_annotations"], 
           ncols=1, frameon=False, palette=colormapping, return_fig=True)
fig.savefig("figures/epithelial/umap_annotations.png",dpi=200,bbox_inches="tight")

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="refined_annotations")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.refined_annotations.unique()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
fig = sc.pl.dotplot(subadata,
              var_names=group_markers, groupby="refined_annotations", standard_scale="var", return_fig=True)
fig.savefig("figures/epithelial/dotplot_groupmarkers.png",dpi=200,bbox_inches="tight")

In [None]:
patlevel_counts = subadata.obs[["sample_id","refined_annotations"]]
patlevel_counts = patlevel_counts.groupby(by="sample_id").value_counts(normalize=True)
patlevel_counts = patlevel_counts.round(2)*100

df = patlevel_counts.unstack(level=-1)

df = df.loc[clinical.index.intersection(df.index)]

In [None]:
def add_clinical_info(ax):
    x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "NT", ha='center', va='bottom', color=col)
    
    x1, x2 = 2, 2   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Primary", ha='center', va='bottom', color=col)
    
    x1, x2 = 3, 5   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
    y, h, col = 100, 10, 'k'
    ax.plot([x1-0.3, x1-0.3, x2+0.3, x2+0.3], [y, y+h, y+h, y], lw=1.5, c=col)
    ax.text((x1+x2)*.5, y+1.3*h, "Metastatic", ha='center', va='bottom', color=col)
    
    return ax

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,3))
df.plot (kind = 'bar', stacked = True, color=colormapping, ax=ax,)
ax.legend(bbox_to_anchor=(1.05, 1), frameon=False)
ax.spines[['right', 'top']].set_visible(False)
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax = add_clinical_info(ax)
for c in ax.containers:

    # Optional: if the segment is small or 0, customize the labels
    labels = [int(v.get_height()) if v.get_height() >= 5 else '' for v in c]
    
    # remove the labels parameter if it's not needed for customized labels
    ax.bar_label(c, labels=labels, label_type='center', fmt='%0.0f', color="white")
fig.savefig("figures/epithelial/barplot_pat_tme_scaled.png", dpi=300, bbox_inches="tight")