In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import palettable


import pathlib as pl

from tqdm.notebook import tqdm

In [None]:
from scipy.stats import fisher_exact

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

# Download data

In [None]:
adata = sc.read_h5ad("/add/path/here/full_cohort.h5ad")

In [None]:
clinical = pd.read_csv("/add/path/here/EAC_clinical_info.csv",index_col=0)
treatment_mapping = {"Neoadjuvant CROSS": "Neoadj. chemo", "Neoadjuvent carboplatin": "Neoadj. chemo"}
metastatic = (clinical["Tumor?"]=="Yes ") & (clinical["Site"].str.contains("metastasis"))
metastatic.name = "Metastatic?"
clinical["Metastatic?"] = metastatic

clinical["Location"] = clinical["Site"].replace({"GEJ": "Esophagus/GEJ", 
                          "Esophagus": "Esophagus/GEJ"})
clinical["Location"][clinical["Location"].str.contains("Liver")] = "Liver"
clinical["Location"][clinical["Location"].str.contains("Adrenal")] = "Adrenal gland"
clinical["Location"][clinical["Location"].str.contains("Peritoneal")] = "Peritoneum"

clinical["Stage"] = clinical["Grade/stage"].replace({"Stage IV ": "IV", "Stage IV": "IV", 
                                                     "Moderately differentiated; ypT1aN0": "I", 
                                                     "Moderately differentiated; pT1aN0": "I", 
                                                     "Poorly differentiated; ypT2N0": "II", 
                                                     "Presented with stage III became stage IV during esophagectomy when pleural metastases were identified": "III/IV"})

clinical["Treatment"] = ["Neoadj. chemo",
                         "None",
                         "Neoadj. chemo + ICI + RT",
                         "None",
                         "None",
                         "Chemo + HER2 targeted + ICI", 
                         "Neoadj. chemo + HER2 targeted", 
                         "Neoadj. chemo + ICI", 
                         "None",
                         "Neoadj. chemo + VEGFR2i"]

clinical["HER2 status"] = clinical["HER2"].replace({"HER 2 1+": "1+/equivocal"})

clinical = clinical.sort_values(by=["Tumor?","Metastatic?","Location"])

clinical["PD-L1 CPS score"] = [0,2,7,2,3,24,0,8,3,15]

In [None]:
refined_annotations = pd.read_csv("/add/path/here/refined_annotations.csv",index_col=0)

refined_annotations.columns = ["refined_annotations"]

refined_wcancer = pd.read_csv("/add/path/here/refined_wCNMF_programs_and_sampleid.csv",index_col=0)

In [None]:
colorlist = palettable.colorbrewer.qualitative.Dark2_8.mpl_colors
colorlistbis = palettable.colorbrewer.qualitative.Paired_3.mpl_colors
colormapping_pat = {'Aguirre_EGSFR1982': colorlist[0], 
                    "Aguirre_EGSFR2218": colorlist[1], 
                    "CCG1153_4411": colorlist[2], 
                    "Aguirre_EGSFR1938": colorlist[3], 
                    "Aguirre_EGSFR0074": colorlist[4], 
                    "Aguirre_EGSFR0128": colorlist[5], 
                    "Aguirre_EGSFR1732": colorlist[6], 
                    "Aguirre_EGSFR0148": colorlist[7], 
                    "CCG1153_4496262": colorlistbis[0], 
                    "CCG1153_6640539": colorlistbis[1], "NA": "whitesmoke"}

# snRNA-seq

In [None]:
highlevel_refined = {"Hepatocyte": "Epithelial", 
                     "Carcinoma": "Carcinoma", 
                     "Fibroblast": "Fibroblast", 
                     "Quiescent endothelial cells": "Endothelial", 
                     "Smooth muscle": "Muscle", 
                     "Skeletal muscle": "Muscle",
                     "TAM2": "Myeloid", "TAM3": "Myeloid",
                     "TCD4": "Lymphoid", 
                     "Inflammatory CAF": "Fibroblast", 
                     "Adipose CAF": "Fibroblast",
                     "HGF-CAF": "Fibroblast",
                     "TAM1": "Myeloid", 
                     "Myeloid-HighMT": "Unknown/technical", 
                     "Angiogenic EC": "Endothelial", 
                     "Quiescent EC": "Endothelial", 
                     "Venous EC": "Endothelial",
                     "TCD8": "Lymphoid", 
                     "B": "Lymphoid", 
                     "DC": "Myeloid", 
                     "Hepatic EC": "Endothelial", 
                     "Kupffer cells": "Myeloid", 
                     "NK": "Lymphoid", 
                     "Treg": "Lymphoid", 
                     "StrMus-HighMT": "Unknown/technical", 
                     "T-HighMT": "Unknown/technical", 
                     "Mast": "Myeloid", 
                     "Adipocytes": "Stromal/Muscle", 
                     "Endo-HighMT": "Unknown/technical"}

In [None]:
adata.obs = pd.concat([adata.obs,refined_annotations],axis=1)
adata.obs = pd.concat([adata.obs,refined_wcancer["refined_wcancer"]],axis=1)

adata.obs["highlevel_refined"] = adata.obs.refined_annotations.replace(highlevel_refined)

In [None]:
colorlist = sns.color_palette("colorblind", 10)
ctlist = adata.obs.highlevel_refined.unique()
colormapping = {ct: colorlist[i] for i,ct in enumerate(ctlist)}
colormapping["NA"] = "whitesmoke"

In [None]:
mTFs = ['KLF5', 'ELF3', 'SMAD3', 'TCF7L2', 'HMGA2', "BNC2"]

## Subset of malignant cells

In [None]:
colorlist = palettable.colorbrewer.qualitative.Set1_5.mpl_colors
colormapping_mal = {"cNMF_1": colorlist[0], "cNMF_2": colorlist[1], "cNMF_3": colorlist[2], 
                    "cNMF_4": colorlist[3], "cNMF_5": colorlist[4]}
colormapping_mal["cNMF_Outlier"] = "grey"
colormapping_mal["Carcinoma_undefined"] = "grey"

In [None]:
subadata = adata[adata.obs.highlevel_refined=="Carcinoma"].copy()

In [None]:
sc.tl.pca(subadata)
sc.external.pp.harmony_integrate(subadata, key="sample_id", basis="X_pca", max_iter_harmony=20)
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")
sc.tl.umap(subadata)

## Score states

In [None]:
import signaturescoring as ssc
marker_genes = {}
for cl in ["1","2","3","4","5"]:
    marker_genes[cl] = pd.read_csv(f"/add/path/here/cNMF_{cl}.csv",index_col=0)
    
for prog in marker_genes:
    ssc.score_signature(adata=subadata,
                        gene_list=list(np.setdiff1d(marker_genes[prog].head(100).index.ravel(),mTFs)), 
                        method="adjusted_neighborhood_scoring", 
                        ctrl_size=150,
                        score_name=f"cNMF_{prog}_score")

In [None]:
ssc.score_signature(adata=subadata,
                        gene_list=["KLF5","ELF3","SMAD3","TCF7L2"], 
                        method="adjusted_neighborhood_scoring", 
                        ctrl_size=150,
                        score_name="mTF_score")

In [None]:
subadata.obs[["cNMF_1_score","cNMF_3_score","cNMF_4_score"]].corr()

In [None]:
TF_expr = pd.DataFrame(subadata[:,mTFs].X.copy().toarray(),index=subadata.obs_names,columns=mTFs)

TF_expr = pd.concat([TF_expr,subadata.obs["refined_wcancer"]],axis=1)

fig, ax = plt.subplots(2,4, figsize=(15,8))
flatax=ax.flatten()
for i,tf in enumerate(mTFs):
    sns.boxplot(data=TF_expr,x="refined_wcancer",
                y=tf,ax=flatax[i], palette=colormapping_mal,order=["Carcinoma_undefined","cNMF_1","cNMF_3","cNMF_4"])
    flatax[i].set_xticklabels(["Carc. Undef.", "cNMF_1", "cNMF_3", "cNMF_4"], rotation=45, ha="right")
    flatax[i].set_xlabel("")
    flatax[i].spines[["top","right"]].set_visible(False)
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/malignant/mTFs_expr_perstate.png",dpi=300,bbox_inches="tight")

In [None]:
TF_expr = pd.DataFrame(subadata[:,mTFs].X.copy().toarray(),index=subadata.obs_names,columns=mTFs)
TF_expr = pd.concat([TF_expr,subadata.obs[["cNMF_1_score","cNMF_3_score","cNMF_4_score"]]],axis=1)

In [None]:
heatmap_df = TF_expr.corr().loc[["cNMF_3_score","cNMF_1_score","cNMF_4_score"],mTFs]

In [None]:
TF_expr[mTFs].corr()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(2,2))
sns.regplot(data=TF_expr, x="cNMF_3_score", y="ELF3", ax=ax, scatter_kws={'s':2, "alpha": 0.1})
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(2,2))
sns.regplot(data=TF_expr, x="cNMF_4_score", y="HMGA2", ax=ax, scatter_kws={'s':2, "alpha": 0.1})
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(2,2))
sns.regplot(data=TF_expr, x="cNMF_3_score", y="KLF5", ax=ax, scatter_kws={'s':2, "alpha": 0.1})
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(4,2))
sns.heatmap(data=heatmap_df, annot=heatmap_df, cmap="vlag", center=0, ax=ax, fmt=".2f")
ax.set_yticklabels(["cNMF_3","cNMF_1","cNMF_4"])
fig.savefig("../clean_code/figures/malignant/heatmap_cNMF_TF_corr.png", dpi=300, bbox_inches="tight")

In [None]:
df = subadata.obs[["refined_wcancer","mTF_score"]]
ax = sns.kdeplot(data=df, x="mTF_score", hue="refined_wcancer", palette=colormapping_mal, common_norm=False)
ax.legend(bbox_to_anchor=(1, 1), frameon=False)
pretty_ax(ax)
ax.figure.savefig("../clean_code/figures/malignant/kdeplot_cNMF_mTF_score.svg", dpi=200, bbox_inches="tight")

In [None]:
df = subadata.obs[["cNMF_3_score","cNMF_1_score","cNMF_4_score","mTF_score"]]
heatmap_df = df.corr().loc[["mTF_score"],["cNMF_3_score","cNMF_1_score","cNMF_4_score"]]
fig, ax = plt.subplots(1,1,figsize=(2,0.5))
sns.heatmap(data=heatmap_df, annot=heatmap_df, cmap="vlag", center=0, ax=ax)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_xticklabels(["cNMF_3","cNMF_1","cNMF_4"], rotation=45, ha="right")
fig.savefig("../clean_code/figures/malignant/heatmap_cNMF_mTFscore_corr.png", dpi=300, bbox_inches="tight")