In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns

from statannotations.Annotator import Annotator

In [None]:
import pathlib as pl

In [None]:
from sklearn.preprocessing import StandardScaler
from lifelines import CoxPHFitter

In [None]:
import itertools

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

# Download TCGA data

In [None]:
tcga_dir = pl.Path("/cluster/work/boeva/scRNAdata/TCGA_data/")

In [None]:
eac_tcga_dir = pl.Path("/cluster/work/boeva/jyates/EAC_singlecell/auxiliary_data/tcga/")

In [None]:
clinical_full = pd.read_csv(eac_tcga_dir / "TCGA.ESCA.sampleMap_ESCA_clinicalMatrix", sep="\t", index_col=0)

In [None]:
clinical_nature = pd.read_csv(eac_tcga_dir / "ESCA_Nature_clinicalinfo.csv", index_col=0, skiprows=1)

In [None]:
hrd_score = pd.read_csv(eac_tcga_dir / "TCGA.HRD_withSampleID.txt.gz",sep="\t",index_col=0).T

In [None]:
immune_subtype = pd.read_csv(eac_tcga_dir / "Subtype_Immune_Model_Based.txt.gz",sep="\t",index_col=0)

In [None]:
fpkm = pd.read_csv(eac_tcga_dir / "TCGA-ESCA.htseq_fpkm-uq.tsv.gz",sep="\t",index_col=0).T
fpkm.index = fpkm.index.str[:15]

In [None]:
gencode = pd.read_csv(eac_tcga_dir /"gencode_mapping_table.csv")
gencode = gencode.set_index('gencode_id_gex')

mapping_gen = gencode.loc[fpkm.columns.intersection(gencode.index)].gene_name.to_dict()
fpkm = fpkm.loc[:,fpkm.columns.intersection(gencode.index)]
fpkm = fpkm.rename(columns=mapping_gen)
fpkm = fpkm.loc[:,~(fpkm.columns.duplicated())]

In [None]:
cnvs = pd.read_csv(eac_tcga_dir / "TCGA-ESCA.gistic.tsv.gz",sep="\t",index_col=0).T
cnvs.index = cnvs.index.str[:15]
cnvs = cnvs.loc[:,cnvs.columns.intersection(gencode.index)]
cnvs = cnvs.rename(columns=mapping_gen)
cnvs = cnvs.loc[:,~cnvs.columns.duplicated()]

In [None]:
mutations = pd.read_csv(eac_tcga_dir / "mc3_gene_level_ESCA_mc3_gene_level.txt.gz",sep="\t",index_col=0).T

In [None]:
mutect2 = pd.read_csv(eac_tcga_dir / "TCGA-ESCA.mutect2_snv.tsv.gz", sep="\t",index_col=0)
mutect2= mutect2[(~(mutect2.effect.str.contains("synonymous_variant"))) & (mutect2["filter"]=="PASS")]
mutect2.index = mutect2.index.str[:-1]
tmb = mutect2.groupby(by=mutect2.index).count()["gene"]/38
tmb.name = "TMB"

In [None]:
eac_patients = clinical_full[clinical_full["histological_type"]=="Esophagus Adenocarcinoma, NOS"].index
eac_patients = fpkm.index.intersection(eac_patients)

In [None]:
normal_patients = eac_patients[eac_patients.str.contains("-11")]
eac_patients = eac_patients[~eac_patients.str.contains("-11")]

In [None]:
fpkm_eac = fpkm.loc[eac_patients]
clinical_eac = clinical_full.loc[eac_patients]
cnvs_eac = cnvs.loc[cnvs.index.intersection(eac_patients)]
mut_eac = mutations.loc[mutations.index.intersection(eac_patients)]
tmb_eac = tmb.loc[tmb.index.intersection(eac_patients)]
hrd_score = hrd_score.loc[hrd_score.index.intersection(eac_patients)]
immune_subtype = immune_subtype.loc[immune_subtype.index.intersection(eac_patients)]

In [None]:
std_fpkm = (fpkm_eac-fpkm_eac.mean())/fpkm_eac.std()

In [None]:
signature_dir = pl.Path("/cluster/work/boeva/jyates/EAC_singlecell/upd_clean_code/signatures_canceronly")

full_sigs = {}
for s in (signature_dir).iterdir():
    sig = s.stem
    full_sigs[sig] = pd.read_csv(s,index_col=0).set_index("0")
    full_sigs[sig] = full_sigs[sig].index.ravel()

In [None]:
caf_sig_dir = pl.Path("/cluster/work/boeva/jyates/EAC_singlecell/upd_clean_code/marker_genes/fibroblast/")

caf_sigs = {}
for sig in caf_sig_dir.iterdir():
    caf_sigs[sig.stem] = pd.read_csv(sig, index_col=0).set_index("names")
    caf_sigs[sig.stem] = caf_sigs[sig.stem][~caf_sigs[sig.stem].index.str.startswith(("MT-","RPS","RPL"))]
    caf_sigs[sig.stem] = caf_sigs[sig.stem].head(100).index.ravel()

In [None]:
myeloid_sig_dir = pl.Path("/cluster/work/boeva/jyates/EAC_singlecell/upd_clean_code/marker_genes/myeloid/")

myeloid_sigs = {}
for sig in myeloid_sig_dir.iterdir():
    myeloid_sigs[sig.stem] = pd.read_csv(sig, index_col=0).set_index("names")
    myeloid_sigs[sig.stem] = myeloid_sigs[sig.stem][~myeloid_sigs[sig.stem].index.str.startswith(("MT-","RPS","RPL"))]
    myeloid_sigs[sig.stem] = myeloid_sigs[sig.stem].head(100).index.ravel()

In [None]:
red_sigs = full_sigs

In [None]:
import gseapy as gp

ss = gp.ssgsea(data=fpkm_eac.T, 
               gene_sets=red_sigs.copy(),
               outdir=None,
               sample_norm_method='rank', # choose 'custom' will only use the raw value of `data`
               no_plot=True)

state_score = ss.res2d.T

In [None]:
ss = gp.ssgsea(data=fpkm_eac.T,
               gene_sets=caf_sigs.copy(),
               outdir=None,
               sample_norm_method='rank', # choose 'custom' will only use the raw value of `data`
               no_plot=True)

caf_score = ss.res2d.T

In [None]:
ss = gp.ssgsea(data=fpkm_eac.T,
               gene_sets=myeloid_sigs.copy(),
               outdir=None,
               sample_norm_method='rank', # choose 'custom' will only use the raw value of `data`
               no_plot=True)

myeloid_score = ss.res2d.T

In [None]:
df = state_score.copy()
df.index = df.index.str[:-3]

common_patients = df.index.intersection(clinical_nature.index)

augclin = pd.concat([state_score,caf_score,myeloid_score,clinical_eac],axis=1,join="inner")
augclin = pd.concat([augclin, tmb_eac],axis=1,join='outer')

stage_mapping = {"Stage X": np.nan, "Stage IV": 4, "Stage IVB": 4, "Stage IVA": 4, "Stage IIB": 2, "Stage IA": 1, "Stage IIIA": 3, "Stage IIA": 2, "Stage IIC": 2, 
                 "Stage I": 1, "Stage IIIC": 3, "Stage IB": 1, "Stage IIIB": 3, 'Stage III': 3, "Stage II": 2, "I/II NOS": 1,
                 "[Discrepancy]": np.nan}

augclin["Disease"] = pd.Series(augclin.index.str[:15].str.split("-").str[-1]).replace({"01": "Tumor", "11": "Normal", "06": "Tumor"}).ravel()

augclin["Stage"] = augclin.pathologic_stage.replace(stage_mapping).astype("category")

augclin["Response to treatment"] = augclin["additional_treatment_completion_success_outcome"].replace({"Partial Response": "Response/Stable", 
                                                                    "Stable Disease": "Response/Stable", 
                                                                    "Complete Response": "Response/Stable"})

augclin["M_status"]  = augclin["pathologic_M"].fillna(augclin["clinical_M"]).replace({"MX": np.nan, "M1a": "M1", "M1b": "M1"})

augclin["N_status"] = augclin["pathologic_N"].fillna(augclin["clinical_N"]).replace({"NX": np.nan, "N2": "N1/N2/N3", "N1": "N1/N2/N3", "N3": "N1/N2/N3"})

augclin["T_status"] = augclin["pathologic_T"].fillna(augclin["clinical_T"]).replace({"TX": np.nan, "T0": np.nan, 
                                                                                     "T3": "T3/T4", 
                                                                                     "T4": "T3/T4",
                                                                                     "T4a": "T3/T4", })

augclin["MSI_status"] = augclin["CDE_ID_3226963"].replace({"MSI-H": "MSI", "MSI-L": "MSI"})
augclin["Histological subtype"] = augclin["histological_type"].replace({"Esophagus Adenocarcinoma, NOS": "EAC", 
                                                                     "Esophagus Squamous Cell Carcinoma": "ESCC"})
augclin["HRD"] = np.nan
augclin.loc[hrd_score.index,"HRD"] = hrd_score["HRD"] 

augclin["Immune subtype"] = "None"
augclin.loc[immune_subtype.index,"Immune subtype"] = immune_subtype["Subtype_Immune_Model_Based"]

In [None]:
def plot_box(ax, augclin, x, y, pairs, order=None):
    sns.boxplot(data=augclin,x=x, y=y, order=order, ax=ax)
    pretty_ax(ax)
    annotator = Annotator(ax, pairs, data=augclin,
                          x=x, y=y, order=order)
    annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
    annotator.apply_and_annotate()

In [None]:
df = augclin[["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]].unstack(-1).reset_index()
df["N status"] = df["level_1"].replace(augclin["N_status"].to_dict())
df = df.set_index("level_1")
df.columns = ["Program", "Score", "N status"]

x= "Program"
y= "Score"
hue = "N status"
hue_order = ["N0", "N1/N2/N3"]
pairs = []
for prog in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]:
    for i,c1 in enumerate(hue_order):
        for j,c2 in enumerate(hue_order):
            if j>i:
                pairs.append([(prog, c1), (prog, c2)])


fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x=x, y=y, hue=hue, hue_order=hue_order,ax=ax)
ax.set_ylabel("ssGSEA score")
ax.set_xticks(ax.get_xticks(), ["cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"])
plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title=hue)


annotator = Annotator(ax, pairs, data=df,
                          x=x, y=y, hue=hue, hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
pretty_ax(ax)
fig.savefig("figures/tcga/cNMF_link_N_status.svg", dpi=200, bbox_inches="tight")

In [None]:
df = augclin[["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]].unstack(-1).reset_index()
df["M status"] = df["level_1"].replace(augclin["M_status"].to_dict())
df = df.set_index("level_1")
df.columns = ["Program", "Score", "M status"]

x= "Program"
y= "Score"
hue = "M status"
hue_order = ["M0", "M1"]
pairs = []
for prog in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]:
    for i,c1 in enumerate(hue_order):
        for j,c2 in enumerate(hue_order):
            if j>i:
                pairs.append([(prog, c1), (prog, c2)])


fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x=x, y=y, hue=hue, hue_order=hue_order,ax=ax)
ax.set_ylabel("ssGSEA score")
ax.set_xticks(ax.get_xticks(), ["cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"])
plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title=hue)


annotator = Annotator(ax, pairs, data=df,
                          x=x, y=y, hue=hue, hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
pretty_ax(ax)
fig.savefig("figures/tcga/cNMF_link_M_status.svg", dpi=200, bbox_inches="tight")

In [None]:
df = augclin[['Adipose CAF',
       'Fibroblast', 'HGF-CAF', 'Inflammatory CAF']].unstack(-1).reset_index()
df["T status"] = df["level_1"].replace(augclin["T_status"].to_dict())
df = df.set_index("level_1")
df.columns = ["Program", "Score", "T status"]

x= "Program"
y= "Score"
hue = "T status"
hue_order = ["T1", "T2", "T3/T4"]
pairs = []
for prog in ['Adipose CAF',
       'Fibroblast', 'HGF-CAF', 'Inflammatory CAF']:
    for i,c1 in enumerate(hue_order):
        for j,c2 in enumerate(hue_order):
            if j>i:
                pairs.append([(prog, c1), (prog, c2)])

fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x=x, y=y, hue=hue, hue_order=hue_order,ax=ax)
ax.set_ylabel("ssGSEA score")

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title=hue)


annotator = Annotator(ax, pairs, data=df,
                          x=x, y=y, hue=hue, hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
pretty_ax(ax)
fig.savefig("figures/tcga/fibroblast_link_T_status.svg", dpi=200, bbox_inches="tight")

In [None]:
df = augclin[['Adipose CAF',
       'Fibroblast', 'HGF-CAF', 'Inflammatory CAF']].unstack(-1).reset_index()
df["N status"] = df["level_1"].replace(augclin["N_status"].to_dict())
df = df.set_index("level_1")
df.columns = ["Program", "Score", "N status"]

x= "Program"
y= "Score"
hue = "N status"
hue_order = ["N0", "N1/N2/N3"]
pairs = []
for prog in ['Adipose CAF',
       'Fibroblast', 'HGF-CAF', 'Inflammatory CAF']:
    for i,c1 in enumerate(hue_order):
        for j,c2 in enumerate(hue_order):
            if j>i:
                pairs.append([(prog, c1), (prog, c2)])

fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x=x, y=y, hue=hue, hue_order=hue_order,ax=ax)
ax.set_ylabel("ssGSEA score")

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title=hue)


annotator = Annotator(ax, pairs, data=df,
                          x=x, y=y, hue=hue, hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
pretty_ax(ax)
fig.savefig("figures/tcga/fibroblast_link_N_status.svg", dpi=200, bbox_inches="tight")

In [None]:
df = augclin[['DC', 'Kupffer cells',
       'Mast', 'TAM1', 'TAM2']].unstack(-1).reset_index()
df["T status"] = df["level_1"].replace(augclin["T_status"].to_dict())
df = df.set_index("level_1")
df.columns = ["Program", "Score", "T status"]

x= "Program"
y= "Score"
hue = "T status"
hue_order = ["T1", "T2", "T3/T4"]
pairs = []
for prog in ['DC', 'Kupffer cells',
       'Mast', 'TAM1', 'TAM2']:
    for i,c1 in enumerate(hue_order):
        for j,c2 in enumerate(hue_order):
            if j>i:
                pairs.append([(prog, c1), (prog, c2)])

fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x=x, y=y, hue=hue, hue_order=hue_order,ax=ax)
ax.set_ylabel("ssGSEA score")

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title=hue)


annotator = Annotator(ax, pairs, data=df,
                          x=x, y=y, hue=hue, hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
pretty_ax(ax)
fig.savefig("figures/tcga/myeloid_link_T_status.svg", dpi=200, bbox_inches="tight")

In [None]:
df = augclin[['DC', 'Kupffer cells',
       'Mast', 'TAM1', 'TAM2']].unstack(-1).reset_index()
df["N status"] = df["level_1"].replace(augclin["N_status"].to_dict())
df = df.set_index("level_1")
df.columns = ["Program", "Score", "N status"]

x= "Program"
y= "Score"
hue = "N status"
hue_order = ["N0", "N1/N2/N3"]
pairs = []
for prog in ['DC', 'Kupffer cells',
       'Mast', 'TAM1', 'TAM2']:
    for i,c1 in enumerate(hue_order):
        for j,c2 in enumerate(hue_order):
            if j>i:
                pairs.append([(prog, c1), (prog, c2)])

fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.boxplot(data=df, x=x, y=y, hue=hue, hue_order=hue_order,ax=ax)
ax.set_ylabel("ssGSEA score")

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title=hue)


annotator = Annotator(ax, pairs, data=df,
                          x=x, y=y, hue=hue, hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()
pretty_ax(ax)
fig.savefig("figures/tcga/myeloid_link_N_status.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,5, figsize=(20,4))
flatax = ax.flatten()
for i,state in enumerate([f"cNMF_{i}" for i in range(1,6)]):
    plot_box(flatax[i], augclin, "Immune subtype", state, 
             [("Inflammatory (Immune C3)","IFN-gamma Dominant (Immune C2)"),
         ("Wound Healing (Immune C1)","Inflammatory (Immune C3)"),
          ("Inflammatory (Immune C3)","Lymphocyte Depleted (Immune C4)")], 
         order=["Wound Healing (Immune C1)","IFN-gamma Dominant (Immune C2)",
                "Inflammatory (Immune C3)","Lymphocyte Depleted (Immune C4)"])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel(f"{state} score")
    flatax[i].set_xticks(flatax[i].get_xticks(), flatax[i].get_xticklabels(), rotation=45, ha="right")
fig.tight_layout()
#fig.savefig("/cluster/work/boeva/jyates/EAC_singlecell/clean_code/figures/tcga/cNMF_4_N_status.svg", dpi=200, bbox_inches="tight")

In [None]:
augclin["HRD cat"] = pd.cut(augclin.HRD, bins=[0,20,30,40,100], labels=["<20","20-30","30-40",">40"])

In [None]:
fig, ax = plt.subplots(2,3, figsize=(7,5))
flatax = ax.flatten()
for i,state in enumerate([f"cNMF_{i}" for i in range(1,6)]):
    plot_box(flatax[i], augclin, "HRD cat", state, 
             [("<20","20-30"),("<20","30-40"),("<20",">40"),("20-30","30-40"),(">40","20-30"),(">40","30-40")], 
         order=["<20","20-30","30-40",">40"])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel(f"{state} score")
flatax[-1].axis("off")
fig.tight_layout()
fig.savefig("figures/tcga/cNMF_link_HRD_score.svg", dpi=200, bbox_inches="tight")

In [None]:
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as mcolors

from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pd.DataFrame(pca.fit_transform(std_fpkm.fillna(0)),index=std_fpkm.index,columns=["PC1","PC2"])

X_pca = pd.concat([X_pca, state_score],axis=1)
X_pca.columns = ["PC1","PC2","cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"]

def plot_pcs_color(ax, state):
    vcenter = X_pca[state].median()
    vmin, vmax = X_pca[state].min(), X_pca[state].max()
    normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
    colormap = matplotlib.colormaps['RdBu_r']
    sns.scatterplot(
        y=X_pca["PC2"],
        x=X_pca["PC1"],
        c=X_pca[state],
        norm=normalize,
        cmap=colormap,
        ax=ax
    )
    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(X_pca[state])
    ax.set_title(state)
    fig.colorbar(scalarmappaple, ax=ax)
    pretty_ax(ax)

fig, ax = plt.subplots(1,5, figsize=(15,2))
flatax = ax.flatten()

plot_pcs_color(flatax[0], "cNMF$_{1}$")
plot_pcs_color(flatax[1], "cNMF$_{2}$")
plot_pcs_color(flatax[2], "cNMF$_{3}$")
plot_pcs_color(flatax[3], "cNMF$_{4}$")
plot_pcs_color(flatax[4], "cNMF$_{5}$")

fig.tight_layout()
fig.savefig("figures/tcga/pca_harmony.png", dpi=200, bbox_inches="tight")

In [None]:
X_pca.corr().round(2)

# Survival

In [None]:
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test

In [None]:
survival = pd.read_csv(eac_tcga_dir / "Survival_SupplementalTable_S1_20171025_xena_sp",sep="\t",index_col=0, dtype={"DSS": float, "OS": float, "DFI": float, "PFI": float})

augclin["age"] = augclin["age_at_initial_pathologic_diagnosis"]

survstr = "DSS"
clin = pd.concat([augclin[["age","Stage","gender"]],survival.loc[augclin.index,[survstr,f"{survstr}.time"]]],axis=1,join="inner")
clin[survstr] = clin[survstr].fillna(clin[survstr].median())
clin.age = (clin.age - clin.age.min())/(clin.age.max()- clin.age.min())
clin.gender = clin.gender.replace({"MALE": 0, "FEMALE": 1})

common_patients = fpkm_eac.index.intersection(clin.index)
std_fpkm = std_fpkm.loc[common_patients]
clin = clin.loc[common_patients]

In [None]:
augclin["age"] = augclin["age_at_initial_pathologic_diagnosis"]

survstr = "DSS"
clin = pd.concat([augclin[["age","Stage","gender"]],survival.loc[augclin.index,[survstr,f"{survstr}.time"]]],axis=1,join="inner")
clin[survstr] = clin[survstr].fillna(clin[survstr].median())
clin.age = (clin.age - clin.age.min())/(clin.age.max()- clin.age.min())
clin.gender = clin.gender.replace({"MALE": 0, "FEMALE": 1})

common_patients = fpkm_eac.index.intersection(clin.index)
std_fpkm = std_fpkm.loc[common_patients]
clin = clin.loc[common_patients]

cox_results = []
for state in ["cNMF_1","cNMF_2","cNMF_3","cNMF_4","cNMF_5"]: 

    expr = state_score[state]
    expr = (expr - expr.mean())/expr.std()
    cox_clin = pd.concat([clin[[survstr,f"{survstr}.time",]],expr],axis=1)
    
    cph = CoxPHFitter()
    cph.fit(cox_clin, duration_col=f"{survstr}.time", event_col=survstr)
    summ = cph.summary
    summ = summ.rename(index={"age": f"{state}_age", "gender": f"{state}_gender"})
    cox_results.append(summ)

cox_results = pd.concat(cox_results)

ps = cox_results.loc[:,"p"].ravel()
x = cox_results.loc[:,"exp(coef)"].ravel()
lower = cox_results.loc[:,"exp(coef) lower 95%"].ravel()
upper = cox_results.loc[:,"exp(coef) upper 95%"].ravel()
ci = [x - lower, upper - x]
names = cox_results.index
colorlist = ["red" if ((x[i] - ci[0][i])>1 and (ci[1][i] + x[i])>1) else ("blue" if ((x[i] - ci[0][i])<1 and (ci[1][i] + x[i])<1) else "black") for i in range(len(ci[0]))]

fig, ax = plt.subplots(1,1,figsize=(3,3))
ax.errorbar(x,np.arange(0,len(x))[::-1],
            xerr=ci, marker="s",
            linewidth=0,
            elinewidth=2,
            ecolor=colorlist,
            markerfacecolor="black",
            markeredgecolor="black")
ax.spines[["bottom","left"]].set_linewidth(2)
ax.spines[["top","right"]].set_visible(False)
ax.vlines(1,ymin=ax.get_ylim()[0],ymax=ax.get_ylim()[1],linestyle="--",color="grey")
ax.set_yticks(np.arange(0,len(x)))
ax.set_yticklabels(["cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"][::-1])
for i,p in enumerate(ps[::-1]):
    ax.text(ax.get_xlim()[1], i, f"p={p:.2e}")

fig.savefig("figures/tcga/survival_cox_plot.svg", dpi=200, bbox_inches="tight")

In [None]:
augclin["age"] = augclin["age_at_initial_pathologic_diagnosis"]

survstr = "DSS"
clin = pd.concat([augclin[["age","Stage","gender"]],survival.loc[augclin.index,[survstr,f"{survstr}.time"]]],axis=1,join="inner")
clin[survstr] = clin[survstr].fillna(clin[survstr].median())
clin.age = (clin.age - clin.age.min())/(clin.age.max()- clin.age.min())
clin.gender = clin.gender.replace({"MALE": 0, "FEMALE": 1})

common_patients = fpkm_eac.index.intersection(clin.index)
std_fpkm = std_fpkm.loc[common_patients]
clin = clin.loc[common_patients]

cox_results = []
for state in ["Inflammatory CAF", "Adipose CAF", "HGF-CAF", "Fibroblast"]: 

    expr = caf_score[state]
    expr = (expr - expr.mean())/expr.std()
    cox_clin = pd.concat([clin[[survstr,f"{survstr}.time",]],expr],axis=1)
    
    cph = CoxPHFitter()
    cph.fit(cox_clin, duration_col=f"{survstr}.time", event_col=survstr)
    summ = cph.summary
    summ = summ.rename(index={"age": f"{state}_age", "gender": f"{state}_gender"})
    cox_results.append(summ)

cox_results = pd.concat(cox_results)

ps = cox_results.loc[:,"p"].ravel()
x = cox_results.loc[:,"exp(coef)"].ravel()
lower = cox_results.loc[:,"exp(coef) lower 95%"].ravel()
upper = cox_results.loc[:,"exp(coef) upper 95%"].ravel()
ci = [x - lower, upper - x]
names = cox_results.index
colorlist = ["red" if ((x[i] - ci[0][i])>1 and (ci[1][i] + x[i])>1) else ("blue" if ((x[i] - ci[0][i])<1 and (ci[1][i] + x[i])<1) else "black") for i in range(len(ci[0]))]

fig, ax = plt.subplots(1,1,figsize=(3,3))
ax.errorbar(x,np.arange(0,len(x))[::-1],
            xerr=ci, marker="s",
            linewidth=0,
            elinewidth=2,
            ecolor=colorlist,
            markerfacecolor="black",
            markeredgecolor="black")
ax.spines[["bottom","left"]].set_linewidth(2)
ax.spines[["top","right"]].set_visible(False)
ax.vlines(1,ymin=ax.get_ylim()[0],ymax=ax.get_ylim()[1],linestyle="--",color="grey")
ax.set_yticks(np.arange(0,len(x)))
ax.set_yticklabels(names[::-1])
for i,p in enumerate(ps[::-1]):
    ax.text(ax.get_xlim()[1], i, f"p={p:.2e}")

In [None]:
augclin["age"] = augclin["age_at_initial_pathologic_diagnosis"]
#survstr = "OS"
survstr = "DSS"
clin = pd.concat([augclin[["age","Stage","gender"]],survival.loc[augclin.index,[survstr,f"{survstr}.time"]]],axis=1,join="inner")
clin[survstr] = clin[survstr].fillna(clin[survstr].median())
clin.age = (clin.age - clin.age.min())/(clin.age.max()- clin.age.min())
clin.gender = clin.gender.replace({"MALE": 0, "FEMALE": 1})

common_patients = fpkm_eac.index.intersection(clin.index)
std_fpkm = std_fpkm.loc[common_patients]
clin = clin.loc[common_patients]

cox_results = []
for state in ['DC', 'Kupffer cells',
       'Mast', 'TAM1', 'TAM2']: 

    expr = myeloid_score[state]
    expr = (expr - expr.mean())/expr.std()
    cox_clin = pd.concat([clin[[survstr,f"{survstr}.time",]],expr],axis=1)
    
    cph = CoxPHFitter()
    cph.fit(cox_clin, duration_col=f"{survstr}.time", event_col=survstr)
    summ = cph.summary
    summ = summ.rename(index={"age": f"{state}_age", "gender": f"{state}_gender"})
    cox_results.append(summ)

cox_results = pd.concat(cox_results)

ps = cox_results.loc[:,"p"].ravel()
x = cox_results.loc[:,"exp(coef)"].ravel()
lower = cox_results.loc[:,"exp(coef) lower 95%"].ravel()
upper = cox_results.loc[:,"exp(coef) upper 95%"].ravel()
ci = [x - lower, upper - x]
names = cox_results.index
colorlist = ["red" if ((x[i] - ci[0][i])>1 and (ci[1][i] + x[i])>1) else ("blue" if ((x[i] - ci[0][i])<1 and (ci[1][i] + x[i])<1) else "black") for i in range(len(ci[0]))]

fig, ax = plt.subplots(1,1,figsize=(3,3))
ax.errorbar(x,np.arange(0,len(x))[::-1],
            xerr=ci, marker="s",
            linewidth=0,
            elinewidth=2,
            ecolor=colorlist,
            markerfacecolor="black",
            markeredgecolor="black")
ax.spines[["bottom","left"]].set_linewidth(2)
ax.spines[["top","right"]].set_visible(False)
ax.vlines(1,ymin=ax.get_ylim()[0],ymax=ax.get_ylim()[1],linestyle="--",color="grey")
ax.set_yticks(np.arange(0,len(x)))
ax.set_yticklabels(names[::-1])
for i,p in enumerate(ps[::-1]):
    ax.text(ax.get_xlim()[1], i, f"p={p:.2e}")