In [None]:
import pandas as pd 
import numpy as np
import scanpy as sc
import pathlib as pl

import seaborn as sns
import matplotlib.pyplot as plt

from statannotations.Annotator import Annotator

from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test

from tqdm.notebook import tqdm

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
cell_cycle_genes = [x.strip() for x in open('/add/path/here/regev_lab_cell_cycle_genes.txt')]
s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]

In [None]:
adata = sc.read_h5ad("/add/path/here/Carroll_EAC_raw.h5ad")

In [None]:
clinical = pd.read_csv("/add/path/here/carroll_clinical.csv", index_col=0)

In [None]:
adata.layers["counts"] = adata.X.copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

In [None]:
del adata.raw

In [None]:
signature_dir = pl.Path("/add/path/here/")

full_sigs = {}
for s in (signature_dir).iterdir():
    sig = s.stem
    full_sigs[sig] = pd.read_csv(s,index_col=0)
    full_sigs[sig] = full_sigs[sig][~full_sigs[sig].index.str.startswith(("MT-","RPS","RPL"))]
    full_sigs[sig] = full_sigs[sig].head(100).index.ravel()

In [None]:
signature_dir2 = pl.Path("/add/path/here/")

fibrosigs = {}
for s in (signature_dir2).iterdir():
    sig = s.stem
    fibrosigs[sig] = pd.read_csv(s,index_col=0).set_index("names")
    fibrosigs[sig] = fibrosigs[sig][~fibrosigs[sig].index.str.startswith(("MT-","RPS","RPL"))]
    fibrosigs[sig] = fibrosigs[sig].head(100).index.ravel()

In [None]:
toptfs = pd.read_csv("/add/path/here/toptfs_top20.csv",index_col=0)
toptfs = {state[:6]: toptfs[state].dropna().ravel() for state in toptfs.columns}

In [None]:
for sig, genes in full_sigs.items():
    sc.tl.score_genes(adata=adata,
                        gene_list=list(np.setdiff1d(genes,toptfs[sig])), 
                        score_name=f"{sig}_score")

In [None]:
for sig, genes in fibrosigs.items():
    sc.tl.score_genes(adata=adata,
                        gene_list=genes, 
                        score_name=f"{sig}_score")

In [None]:
adata.obs["condition"] = adata.obs["sample"].str.split("_").str[1]

In [None]:
sc.tl.pca(adata)

In [None]:
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color=["celltype"])

# Subset carcinoma single cells

In [None]:
subadata = adata[adata.obs.celltype.isin(["EAC"])].copy()

In [None]:
subadata = subadata[~subadata.obs.patient.isin(["ESCC-LAFI",'BARR-3242', 'BARR-4845', 'BARR-4846','BARR-4988'])].copy()

In [None]:
sc.tl.score_genes_cell_cycle(subadata, s_genes=s_genes, g2m_genes=g2m_genes)

In [None]:
sc.tl.pca(subadata)
sc.pp.neighbors(subadata)
sc.tl.umap(subadata)

In [None]:
fig = sc.pl.umap(subadata, 
           color=['cNMF_1_score',"cNMF_2_score",'cNMF_3_score','cNMF_4_score',
                  "cNMF_5_score","patient"],
           ncols=2, frameon=False, return_fig=True)
for i in range(5):
    fig.axes[2*i].set_title(f"cNMF$_{i+1}$ score")
fig.axes[-1].set_title("Patient ID")
fig.savefig("figures/Carroll_unintegrated_cNMF_score_snRNA_umap.png", dpi=300, bbox_inches="tight")

In [None]:
sc.external.pp.harmony_integrate(subadata, key="patient", max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")
sc.tl.umap(subadata)

In [None]:
fig=sc.pl.umap(subadata, 
           color=['cNMF_1_score',"cNMF_2_score",'cNMF_3_score','cNMF_4_score',
                  "cNMF_5_score","patient"],
           ncols=2, frameon=False, return_fig=True)
for i in range(5):
    fig.axes[2*i].set_title(f"cNMF$_{i+1}$ score")
fig.axes[-1].set_title("Patient ID")
fig.savefig("figures/Carroll_cNMF_score_snRNA_umap.png", dpi=300, bbox_inches="tight")

In [None]:
subadata.obs["condition"] = subadata.obs["sample"].str.split("_").str[1]

In [None]:
sc.pl.umap(subadata, 
           color=['condition',"patient"],
           ncols=2, frameon=False)

In [None]:
subadata.obs[["cNMF_1_score","cNMF_2_score","cNMF_3_score","cNMF_4_score","cNMF_5_score"]].corr()

In [None]:
df = subadata.obs[subadata.obs.tissue.isin(["EAC","EAC.Op"])].copy()

df.patient = df.patient.astype(str)

df["Clinical Benefit"] = df.patient.replace(clinical['Clinical_benefit'].to_dict())

df["Clinical Benefit"] = df["Clinical Benefit"].replace({'EAC-ODHL': "NA", 'EAC-IKIJ': "NA", 'EAC-JJHD': "NA"})

In [None]:
y="cNMF_5_score"
pairs = [((pat, "PreTx"), (pat, "ICI-4W")) for pat in df.patient.unique()]

fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x="patient", y=y, 
                 order=["EAC-ACMO","EAC-GDBD","EAC-JCNP","EAC-LPPN",
                        "EAC-HDHI","EAC-HENB","EAC-HGFI","EAC-PAHE",'EAC-ODHL', 'EAC-IKIJ','EAC-JJHD'],
                 hue="condition", hue_order=["PreTx","ICI-4W","PostTx"],ax=ax)
plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title="Condition")
pretty_ax(ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

annotator = Annotator(ax, pairs, data=df, x="patient", y=y, 
                 order=["EAC-ACMO","EAC-GDBD","EAC-JCNP","EAC-LPPN",
                        "EAC-HDHI","EAC-HENB","EAC-HGFI","EAC-PAHE",'EAC-ODHL', 'EAC-IKIJ','EAC-JJHD'],
                 hue="condition", hue_order=["PreTx","ICI-4W","PostTx"])
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
fig.savefig("figures/external/carroll_cNMF_5_link_CB_perpatient.svg", dpi=200, bbox_inches="tight")

In [None]:
y="cNMF_5_score"
pairs = [(("CB", "PreTx"), ("CB", "ICI-4W")),
         (("CB", "PreTx"), ("CB", "PostTx")),
         (("CB", "ICI-4W"), ("CB", "PostTx")),
         (("NCB", "PreTx"), ("NCB", "ICI-4W")),]


fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.boxplot(data=df, x="Clinical Benefit", y=y, 
                 order=["CB","NCB"],
                 hue="condition", hue_order=["PreTx","ICI-4W","PostTx"],ax=ax)

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title="Condition")
pretty_ax(ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

annotator = Annotator(ax, pairs, data=df, x="Clinical Benefit", y=y, 
                 order=["CB","NCB"],
                 hue="condition", hue_order=["PreTx","ICI-4W","PostTx"],)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
fig.savefig("figures/external/carroll_cNMF_5_link_CB.svg", dpi=200, bbox_inches="tight")

In [None]:
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as mcolors

X_pca = pd.DataFrame(subadata.obsm["X_pca_harmony"][:,:2],index=subadata.obs_names,columns=["PC1","PC2"])

X_pca = pd.concat([X_pca, subadata.obs[["cNMF_1_score","cNMF_2_score",
                                        "cNMF_3_score","cNMF_4_score","cNMF_5_score",
                                        "S_score","G2M_score"]]],axis=1)
X_pca.columns = ["PC1","PC2","cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$","S_score","G2M_score"]

def plot_pcs_color(ax, state):
    vcenter = 0
    vmin, vmax = X_pca[state].min(), X_pca[state].max()
    normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
    colormap = matplotlib.colormaps['RdBu_r']
    sns.scatterplot(
        y=X_pca["PC2"],
        x=X_pca["PC1"],
        c=X_pca[state],
        s=5,
        norm=normalize,
        cmap=colormap,
        ax=ax
    )
    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(X_pca[state])
    fig.colorbar(scalarmappaple, ax=ax)
    ax.set_title(state)
    pretty_ax(ax)

fig, ax = plt.subplots(1,5, figsize=(15,2))
flatax = ax.flatten()

plot_pcs_color(flatax[0], "cNMF$_{1}$")
plot_pcs_color(flatax[1], "cNMF$_{2}$")
plot_pcs_color(flatax[2], "cNMF$_{3}$")
plot_pcs_color(flatax[3], "cNMF$_{4}$")
plot_pcs_color(flatax[4], "cNMF$_{5}$")

fig.tight_layout()
fig.tight_layout()
fig.savefig("figures/Carroll_pca_harmony.png", dpi=200, bbox_inches="tight")

# Subset fibroblast cells

In [None]:
subadata = adata[adata.obs.subcompartment.isin(["Fibroblast_Mural"])].copy()

In [None]:
subadata = subadata[~subadata.obs.patient.isin(["ESCC-LAFI",'BARR-3242', 'BARR-4845', 'BARR-4846','BARR-4988'])].copy()

In [None]:
sc.tl.pca(subadata)
sc.pp.neighbors(subadata)
sc.tl.umap(subadata)

In [None]:
fig = sc.pl.umap(subadata, 
           color=['Adipose CAF_score',
       'HGF-CAF_score', 'Inflammatory CAF_score', 'Fibroblast_score',"celltype","patient"],
           ncols=2, frameon=False, return_fig=True)

In [None]:
sc.external.pp.harmony_integrate(subadata, key="patient", max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")
sc.tl.umap(subadata)

In [None]:
fig = sc.pl.umap(subadata, 
           color=['Adipose CAF_score',
       'HGF-CAF_score', 'Inflammatory CAF_score', 'Fibroblast_score',"celltype","patient"],
           ncols=2, frameon=False, return_fig=True)

In [None]:
df = subadata.obs[subadata.obs.tissue.isin(["EAC","EAC.Op"])].copy()

df.patient = df.patient.astype(str)

df["Clinical Benefit"] = df.patient.replace(clinical['Clinical_benefit'].to_dict())

df["Clinical Benefit"] = df["Clinical Benefit"].replace({'EAC-ODHL': "NA", 'EAC-IKIJ': "NA", 'EAC-JJHD': "NA"})

In [None]:
y="Inflammatory CAF_score"
pairs = [((pat, "PreTx"), (pat, "ICI-4W")) for pat in df.patient.unique()]
pairs += [((pat, "PreTx"), (pat, "PostTx")) for pat in df[df["condition"]=="PostTx"].patient.unique()]

fig, ax = plt.subplots(1,1,figsize=(6,3))
sns.boxplot(data=df, x="patient", y=y, 
                 order=["EAC-ACMO","EAC-GDBD","EAC-JCNP","EAC-LPPN",
                        "EAC-HDHI","EAC-HENB","EAC-HGFI","EAC-PAHE",'EAC-ODHL', 'EAC-IKIJ','EAC-JJHD'],
                 hue="condition", hue_order=["PreTx","ICI-4W","PostTx"],ax=ax)
plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title="Condition")
pretty_ax(ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

annotator = Annotator(ax, pairs, data=df, x="patient", y=y, 
                 order=["EAC-ACMO","EAC-GDBD","EAC-JCNP","EAC-LPPN",
                        "EAC-HDHI","EAC-HENB","EAC-HGFI","EAC-PAHE",'EAC-ODHL', 'EAC-IKIJ','EAC-JJHD'],
                 hue="condition", hue_order=["PreTx","ICI-4W","PostTx"])
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
fig.savefig("figures/external/carroll_fibroblast_link_CB_perpatient.svg", dpi=200, bbox_inches="tight")

In [None]:
y="Inflammatory CAF_score"
pairs = [(("CB", "PreTx"), ("CB", "ICI-4W")),
         (("NCB", "PreTx"), ("NCB", "ICI-4W")),]


fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.boxplot(data=df, x="Clinical Benefit", y=y, 
                 order=["CB","NCB"],
                 hue="condition", hue_order=["PreTx","ICI-4W"],ax=ax)

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title="Condition")
pretty_ax(ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

annotator = Annotator(ax, pairs, data=df, x="Clinical Benefit", y=y, 
                 order=["CB","NCB"],
                 hue="condition", hue_order=["PreTx","ICI-4W",])
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
fig.savefig("figures/external/carroll_fibroblast_link_CB.svg", dpi=200, bbox_inches="tight")

# Read bulk

In [None]:
gencode = pd.read_csv("/add/path/here/gencode_v41_positions.csv",index_col=0)

gencode["gen_red"] = gencode["gene_id"].str.split(".").str[0]

mapping = gencode.set_index("gen_red")["gene_name"].to_dict()

In [None]:


gene_info = bulk.iloc[:,:5].copy()

bulk = bulk.iloc[:,5:].copy()

In [None]:
condition = pd.DataFrame(bulk.columns.str.split("_").str[1], index=bulk.columns, columns=["condition"])
condition["patient"] = condition.index.str.split("_").str[0]
condition["tissue"] = condition.index.str.split("_").str[2]

In [None]:
logcounts = bulk.sum().apply(np.log1p)

tpm = (bulk.T / gene_info["Length"]).T

tpm = tpm / tpm.sum()*1000000

tpm = tpm.rename(index=mapping).T

tpm = tpm.loc[:,(tpm==0).sum()<=0.5*tpm.shape[0]]

tpm["condition"] = tpm.index.str.split("_").str[1]
tpm["patient"] = tpm.index.str.split("_").str[0]
tpm["tissue"] = tpm.index.str.split("_").str[2]

tpm = tpm[tpm["tissue"]=="Tumor"]

# Survival analysis on PreTx tissue only

In [None]:
red_tpm = std_tpm[std_tpm["condition"]=="PreTx"]

In [None]:
import gseapy as gp

ss = gp.ssgsea(data=red_tpm.drop(["condition","patient","tissue"],axis=1).T,
               gene_sets=full_sigs.copy(),
               outdir=None,
               sample_norm_method='rank', # choose 'custom' will only use the raw value of `data`
               no_plot=True)

state_score = ss.res2d.T

state_score.index = state_score.index.str.split("_").str[0]

ss = gp.ssgsea(data=red_tpm.drop(["condition","patient","tissue"],axis=1).T,
               gene_sets=caf_sigs.copy(),
               outdir=None,
               sample_norm_method='rank', # choose 'custom' will only use the raw value of `data`
               no_plot=True)

caf_score = ss.res2d.T

caf_score.index = caf_score.index.str.split("_").str[0]

In [None]:
augclin = pd.concat([state_score.loc[common_patients],
                     caf_score.loc[common_patients], 
                     clinical.loc[common_patients]],axis=1)

In [None]:
from lifelines import CoxPHFitter
survstr="OS"
clin = augclin[["Age","Stage","Sex","Status","OS"]].copy()
clin[survstr] = clin[survstr].fillna(clin[survstr].median())
clin.Age = (clin.Age - clin.Age.min())/(clin.Age.max()- clin.Age.min())
clin.Sex = clin.Sex.replace({"Male": 0, "Female": 1})
clin.Stage = clin.Stage.replace({"IV": 1, "IIIB": 0, "IIIA": 0, "IIIC": 0})


cox_results = []
for state in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]: 
    expr = augclin[state]
    expr = (expr - expr.mean())/expr.std()
    cox_clin = pd.concat([clin[["Status","OS"]],expr],axis=1)
    
    cph = CoxPHFitter()
    cph.fit(cox_clin, duration_col=f"OS", event_col="Status")
    summ = cph.summary
    cox_results.append(summ)

cox_results = pd.concat(cox_results)

ps = cox_results.loc[:,"p"].ravel()
x = cox_results.loc[:,"exp(coef)"].ravel()
lower = cox_results.loc[:,"exp(coef) lower 95%"].ravel()
upper = cox_results.loc[:,"exp(coef) upper 95%"].ravel()
ci = [x - lower, upper - x]
names = cox_results.index
colorlist = ["red" if ((x[i] - ci[0][i])>1 and (ci[1][i] + x[i])>1) else ("blue" if ((x[i] - ci[0][i])<1 and (ci[1][i] + x[i])<1) else "black") for i in range(len(ci[0]))]

fig, ax = plt.subplots(1,1,figsize=(3,3))
ax.errorbar(x,np.arange(0,len(x))[::-1],
            xerr=ci, marker="s",
            linewidth=0,
            elinewidth=2,
            ecolor=colorlist,
            markerfacecolor="black",
            markeredgecolor="black")
ax.spines[["bottom","left"]].set_linewidth(2)
ax.spines[["top","right"]].set_visible(False)
ax.vlines(1,ymin=ax.get_ylim()[0],ymax=ax.get_ylim()[1],linestyle="--",color="grey")
ax.set_yticks(np.arange(0,len(x)))
ax.set_yticklabels(["cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"][::-1])
for i,p in enumerate(ps[::-1]):
    ax.text(ax.get_xlim()[1], i, f"p={p:.2e}")
    
fig.savefig("figures/survival_carroll_cox_plot.svg", dpi=200, bbox_inches="tight")