In [1]:
import pandas as pd
import numpy as np
import pathlib as pl

import matplotlib.pyplot as plt
import seaborn as sns

import scipy

from sklearn.preprocessing import StandardScaler

from statannotations.Annotator import Annotator

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
def get_tpm(gencode_mapping: pd.DataFrame, bulk: pd.DataFrame) -> pd.DataFrame:
    gene_length = (gencode_mapping.set_index("gene_name")["end"] - gencode_mapping.set_index("gene_name")["start"])
    gene_length = gene_length[~(gene_length.index.duplicated())]

    bulk = bulk.loc[:,bulk.columns.intersection(gene_length.index)]
    gene_length = gene_length.loc[bulk.columns.intersection(gene_length.index)]

    full_rpk = bulk/gene_length

    pm_factor = full_rpk.sum(axis=1)/1000000

    tpm = (full_rpk.T / pm_factor).T
    
    return tpm

In [None]:
import os
os.makedirs("figures/ecotypes/", exist_ok=True)

In [None]:
tcga_dir = pl.Path("/add/path/here/")

In [2]:
bp_eac = pd.read_csv("/add/path/here/eac_purity.csv",index_col=0)
bp_eac.index = bp_eac.index.str.replace(".","-").str[:-1]

bp_eac_gse = pd.read_csv("/add/path/here/eac_gse_purity.csv",index_col=0)

bp_eac_carroll = pd.read_csv("/add/path/here/eac_carroll_purity.csv",index_col=0)
bp_eac_carroll.index = bp_eac_carroll.index.str.replace(".","-")

  bp_eac.index = bp_eac.index.str.replace(".","-").str[:-1]
  bp_eac_carroll.index = bp_eac_carroll.index.str.replace(".","-")


In [None]:
purity = pd.read_csv("/add/path/here/TCGA_absolute_purity.txt",index_col=0,sep="\t") # for ESCA

In [None]:
joint_purity = pd.concat([purity["purity"], bp_eac["tumor"]],axis=1).dropna()

In [None]:
from scipy.stats import pearsonr
r,p = pearsonr(joint_purity.tumor.ravel(), joint_purity.purity.ravel())

fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.regplot(data=joint_purity, y="tumor", x="purity",ax=ax)
pretty_ax(ax)
ax.text(0.75,0.1,f"R={r:.2f}\np={p:.1e}",fontsize=10)
ax.set_ylabel("BayesPrism Purity")
ax.set_xlabel("ABSOLUTE Purity")

In [None]:
clinical_full = pd.read_csv(tcga_dir / "TCGA.ESCA.sampleMap_ESCA_clinicalMatrix", sep="\t", index_col=0)

In [None]:
fpkm = pd.read_csv(tcga_dir / "TCGA-ESCA.htseq_fpkm-uq.tsv.gz",sep="\t",index_col=0).T
fpkm.index = fpkm.index.str[:15]

In [None]:
gencode = pd.read_csv(tcga_dir /"gencode_mapping_table.csv")
gencode = gencode.set_index('gencode_id_gex')

mapping_gen = gencode.loc[fpkm.columns.intersection(gencode.index)].gene_name.to_dict()
fpkm = fpkm.loc[:,fpkm.columns.intersection(gencode.index)]
fpkm = fpkm.rename(columns=mapping_gen)
fpkm = fpkm.loc[:,~(fpkm.columns.duplicated())]

In [None]:
eac_patients = clinical_full[clinical_full["histological_type"]=="Esophagus Adenocarcinoma, NOS"].index
eac_patients = fpkm.index.intersection(eac_patients)

normal_patients = eac_patients[eac_patients.str.contains("-11")]
eac_patients = eac_patients[~eac_patients.str.contains("-11")]

In [None]:
fpkm_eac = fpkm.loc[eac_patients]

In [None]:
gex_df = pd.read_csv("/add/path/here/GSE207526_110.EAC.and.10.Normal.for.GSEA.txt",sep="\t").iloc[1:,:].T

gencode_mapping = pd.read_csv("/add/path/here/gencode_v41_positions.csv",index_col=0)

tpm = get_tpm(gencode_mapping, gex_df)

In [None]:
gex_df2 = pd.read_csv("/add/path/here/bulk_preprocessed.csv",index_col=0).T

gencode_mapping = pd.read_csv("/add/path/here/gencode_v41_positions.csv",index_col=0)

tpm2 = get_tpm(gencode_mapping, gex_df2)

In [None]:
signature_dir = pl.Path("/add/path/here/signatures_canceronly/")

full_sigs = {}
for s in (signature_dir).iterdir():
    sig = s.stem
    full_sigs[sig] = pd.read_csv(s,index_col=0).set_index("0")
    full_sigs[sig] = full_sigs[sig][~full_sigs[sig].index.str.startswith(("MT-","RPS","RPL"))]
    full_sigs[sig] = full_sigs[sig].index.ravel()

In [None]:
from itertools import chain
full_tpm = pd.concat([fpkm_eac, tpm, tpm2]).dropna(axis=1)
full_tpm["Study"] = list(chain(["TCGA"]*fpkm_eac.shape[0],["Hoefnagel"]*tpm.shape[0],["Carroll"]*tpm2.shape[0]))

In [None]:
import gseapy as gp

ss = gp.ssgsea(data=full_tpm.drop("Study", axis=1).T,
               gene_sets=full_sigs.copy(),
               min_size=10,
               outdir=None,
               sample_norm_method='rank', # choose 'custom' will only use the raw value of `data`
               no_plot=True)

state_score = ss.res2d.T

In [None]:
rel_prop = bp_eac.drop(["tumor","NonSquamous_Epithelium","Squamous_Epithelium"],axis=1)
rel_prop = (rel_prop - rel_prop.min())/(rel_prop.max() - rel_prop.min())
rel_prop = rel_prop.loc[eac_patients]

rel_prop_gse = bp_eac_gse.drop(["tumor","NonSquamous_Epithelium","Squamous_Epithelium"],axis=1)
rel_prop_gse = (rel_prop_gse - rel_prop_gse.min())/(rel_prop_gse.max() - rel_prop_gse.min())

rel_prop_carroll = bp_eac_carroll.drop(["tumor","NonSquamous_Epithelium","Squamous_Epithelium"],axis=1)
rel_prop_carroll = (rel_prop_carroll - rel_prop_carroll.min())/(rel_prop_carroll.max() - rel_prop_carroll.min())

full_rel_prop = pd.concat([rel_prop,rel_prop_gse,rel_prop_carroll])

In [None]:
study = pd.DataFrame(list(chain(["TCGA"]*fpkm_eac.shape[0],["Hoefnagel"]*tpm.shape[0],["Carroll"]*tpm2.shape[0])),
                     index=full_rel_prop.index,columns=["Study"])

In [None]:
ax = sns.clustermap(data=full_rel_prop, cmap="vlag", method="ward", )

In [None]:
lnkg = ax.dendrogram_row.linkage

In [None]:
clusters = scipy.cluster.hierarchy.fcluster(lnkg, t=6, criterion="maxclust")

In [None]:
row_colors = pd.DataFrame(clusters, index=full_rel_prop.index, columns=["Ecotype"])
row_colors = row_colors.replace({1: "red", 2: "blue", 3: "purple", 4: "yellow", 5: "green", 6: "pink"})
row_colors = pd.concat([row_colors, study.replace({"TCGA": "dimgrey", "Hoefnagel": "darkgreen", "Carroll": "cornflowerblue"})],axis=1)

In [None]:
ax = sns.clustermap(data=full_rel_prop, cmap="vlag", method="ward", row_colors=row_colors, figsize=(5,6), tree_kws={"linewidth": 2})
ax.ax_heatmap.yaxis.set_visible(False)
ax.figure.savefig("figures/ecotypes/ecotype_5eco_heatmap.svg", dpi=200, bbox_inches="tight")

In [None]:
row_colors.groupby("Study").value_counts().unstack()

In [None]:
df = pd.concat([full_rel_prop,pd.DataFrame(clusters, index=full_rel_prop.index, columns=["Cluster"])],axis=1)
df.groupby(by="Cluster").mean()

In [None]:
ecotypes = row_colors.replace({"red": "Vasc. Endo.", 
                    "blue": "Immune/stromal desert", 
                    "purple": "Vasc. Endo.+Schwann",
                    "yellow": "Vasc. Endo.+Fibroblast+Myeloid", 
                    "green": "B+Plasma","pink": "T+NK+B"})
ecotypes.columns = ["Ecotypes","Study"]
ecotypes.Study = ecotypes.Study.replace({"dimgrey": "TCGA", "darkgreen": "Hoefnagel", "cornflowerblue": "Carroll"})

df = pd.concat([state_score, ecotypes],axis=1)

In [None]:
palette = {"red": "Vasc. Endo.", 
                    "blue": "Immune/stromal desert", 
                    "purple": "Vasc. Endo.+Schwann",
                    "yellow": "Vasc. Endo.+Fibroblast+Myeloid", 
                    "green": "B+Plasma","pink": "T+NK+B"}
palette = {v: k for k,v in palette.items()}

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
x="Ecotypes"
y="cNMF_3"

order = ["Immune/stromal desert",
                   "Vasc. Endo.",
                   "Vasc. Endo.+Schwann",
         "Vasc. Endo.+Fibroblast+Myeloid",
                   "T+NK+B",
                   "B+Plasma"]
#order = list(palette.keys())

pairs = [("Immune/stromal desert","Vasc. Endo.+Schwann"),
         ("Immune/stromal desert","Vasc. Endo.+Fibroblast+Myeloid"),
         ("Immune/stromal desert","Vasc. Endo."),
         ("Immune/stromal desert","T+NK+B"),("Immune/stromal desert","B+Plasma")]


sns.boxplot(data=df, x=x, y=y, ax=ax, palette=palette, order=order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)
ax.set_xlabel("")
annotator = Annotator(ax, pairs, data=df,
                      x=x, y=y, order=order)
annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside')
annotator.apply_and_annotate()
#ax.hlines(xmin=ax.get_xlim()[0], xmax=ax.get_xlim()[1], y=0, color="grey", linestyle="--")
ax.set_ylabel("cNMF$_{3}$")

fig.savefig("figures/ecotypes/cNMF_3_dist_5eco_boxplot.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
x="Ecotypes"
y="cNMF_3"
hue="Study"

#order = list(palette.keys())
order = ["Immune/stromal desert",
                   "Vasc. Endo.",
                   "Vasc. Endo.+Schwann",
         "Vasc. Endo.+Fibroblast+Myeloid",
                   "T+NK+B",
                   "B+Plasma"]
pairs = [
         (("Immune/stromal desert", "TCGA"), ("Vasc. Endo.+Fibroblast+Myeloid", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("Vasc. Endo.", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("T+NK+B", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("B+Plasma", "TCGA")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.+Schwann", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.+Fibroblast+Myeloid", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("T+NK+B", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("B+Plasma", "Hoefnagel")),
        (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.+Schwann", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.+Fibroblast+Myeloid", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("T+NK+B", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("B+Plasma", "Carroll")),]


sns.boxplot(data=df, x=x, y=y, ax=ax, order=order, hue=hue, palette={"TCGA": "dimgrey", "Hoefnagel": "darkgreen", "Carroll": "cornflowerblue"})
ax.legend(bbox_to_anchor=(1,1,0,0),frameon=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)
ax.set_xlabel("")
#ax.hlines(xmin=ax.get_xlim()[0], xmax=ax.get_xlim()[1], y=0, color="grey", linestyle="--")
annotator = Annotator(ax, pairs, data=df, hue=hue,
                      x=x, y=y, order=order)
annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside')
annotator.apply_and_annotate()
ax.set_ylabel("cNMF$_{3}$")

fig.savefig("figures/ecotypes/cNMF_3_dist_5eco__perstudy_boxplot.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
x="Ecotypes"
y="cNMF_4"

order = ["Immune/stromal desert",
                   "Vasc. Endo.",
                   "Vasc. Endo.+Schwann",
         "Vasc. Endo.+Fibroblast+Myeloid",
                   "T+NK+B",
                   "B+Plasma"]
#order = list(palette.keys())

pairs = [("Immune/stromal desert","Vasc. Endo.+Schwann"),
         ("Immune/stromal desert","Vasc. Endo.+Fibroblast+Myeloid"),
         ("Immune/stromal desert","Vasc. Endo."),
         ("Immune/stromal desert","T+NK+B"),("Immune/stromal desert","B+Plasma")]


sns.boxplot(data=df, x=x, y=y, ax=ax, palette=palette, order=order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)
ax.set_xlabel("")
annotator = Annotator(ax, pairs, data=df,
                      x=x, y=y, order=order)
annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside')
annotator.apply_and_annotate()
ax.set_ylabel("cNMF$_{4}$")


fig.savefig("figures/ecotypes/cNMF_4_dist_5eco_boxplot.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
x="Ecotypes"
y="cNMF_4"
hue="Study"

#order = list(palette.keys())
order = ["Immune/stromal desert",
                   "Vasc. Endo.",
                   "Vasc. Endo.+Schwann",
         "Vasc. Endo.+Fibroblast+Myeloid",
                   "T+NK+B",
                   "B+Plasma"]
pairs = [
         (("Immune/stromal desert", "TCGA"), ("Vasc. Endo.+Fibroblast+Myeloid", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("Vasc. Endo.", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("T+NK+B", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("B+Plasma", "TCGA")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.+Schwann", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.+Fibroblast+Myeloid", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("T+NK+B", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("B+Plasma", "Hoefnagel")),
        (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.+Schwann", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.+Fibroblast+Myeloid", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("T+NK+B", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("B+Plasma", "Carroll")),]


sns.boxplot(data=df, x=x, y=y, ax=ax, order=order, hue=hue, palette={"TCGA": "dimgrey", "Hoefnagel": "darkgreen", "Carroll": "cornflowerblue"})
ax.legend(bbox_to_anchor=(1,1,0,0),frameon=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)
ax.set_xlabel("")
#ax.hlines(xmin=ax.get_xlim()[0], xmax=ax.get_xlim()[1], y=0, color="grey", linestyle="--")
annotator = Annotator(ax, pairs, data=df, hue=hue,
                      x=x, y=y, order=order)
annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside')
annotator.apply_and_annotate()
ax.set_ylabel("cNMF$_{4}$")


fig.savefig("figures/ecotypes/cNMF_4_dist_5eco_perstudy_boxplot.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
x="Ecotypes"
y="cNMF_5"

order = ["Immune/stromal desert",
                   "Vasc. Endo.",
                   "Vasc. Endo.+Schwann",
         "Vasc. Endo.+Fibroblast+Myeloid",
                   "T+NK+B",
                   "B+Plasma"]
#order = list(palette.keys())

pairs = [("Immune/stromal desert","Vasc. Endo.+Schwann"),
         ("Immune/stromal desert","Vasc. Endo.+Fibroblast+Myeloid"),
         ("Immune/stromal desert","Vasc. Endo."),
         ("Immune/stromal desert","T+NK+B"),("Immune/stromal desert","B+Plasma")]


sns.boxplot(data=df, x=x, y=y, ax=ax, palette=palette, order=order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)
ax.set_xlabel("")
annotator = Annotator(ax, pairs, data=df,
                      x=x, y=y, order=order)
annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside')
annotator.apply_and_annotate()
ax.set_ylabel("cNMF$_{5}$")

fig.savefig("figures/ecotypes/cNMF_5_dist_5eco_boxplot.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
x="Ecotypes"
y="cNMF_5"
hue="Study"

#order = list(palette.keys())
order = ["Immune/stromal desert",
                   "Vasc. Endo.",
                   "Vasc. Endo.+Schwann",
         "Vasc. Endo.+Fibroblast+Myeloid",
                   "T+NK+B",
                   "B+Plasma"]
pairs = [
         (("Immune/stromal desert", "TCGA"), ("Vasc. Endo.+Fibroblast+Myeloid", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("Vasc. Endo.", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("T+NK+B", "TCGA")),
         (("Immune/stromal desert", "TCGA"), ("B+Plasma", "TCGA")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.+Schwann", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.+Fibroblast+Myeloid", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("Vasc. Endo.", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("T+NK+B", "Hoefnagel")),
         (("Immune/stromal desert", "Hoefnagel"), ("B+Plasma", "Hoefnagel")),
        (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.+Schwann", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.+Fibroblast+Myeloid", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("Vasc. Endo.", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("T+NK+B", "Carroll")),
         (("Immune/stromal desert", "Carroll"), ("B+Plasma", "Carroll")),]


sns.boxplot(data=df, x=x, y=y, ax=ax, order=order, hue=hue, palette={"TCGA": "dimgrey", "Hoefnagel": "darkgreen", "Carroll": "cornflowerblue"})
ax.legend(bbox_to_anchor=(1,1,0,0),frameon=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
pretty_ax(ax)
ax.set_xlabel("")
#ax.hlines(xmin=ax.get_xlim()[0], xmax=ax.get_xlim()[1], y=0, color="grey", linestyle="--")
annotator = Annotator(ax, pairs, data=df, hue=hue,
                      x=x, y=y, order=order)
annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside')
annotator.apply_and_annotate()
ax.set_ylabel("cNMF$_{5}$")

fig.savefig("figures/ecotypes/cNMF_5_dist_5eco_perstudy_boxplot.svg", dpi=200, bbox_inches="tight")

# With n_cluster=2

In [None]:
rel_prop = bp_eac.drop(["tumor","NonSquamous_Epithelium","Squamous_Epithelium"],axis=1)
rel_prop = (rel_prop - rel_prop.min())/(rel_prop.max() - rel_prop.min())
rel_prop = rel_prop.loc[eac_patients]

rel_prop_gse = bp_eac_gse.drop(["tumor","NonSquamous_Epithelium","Squamous_Epithelium"],axis=1)
rel_prop_gse = (rel_prop_gse - rel_prop_gse.min())/(rel_prop_gse.max() - rel_prop_gse.min())

rel_prop_carroll = bp_eac_carroll.drop(["tumor","NonSquamous_Epithelium","Squamous_Epithelium"],axis=1)
rel_prop_carroll = (rel_prop_carroll - rel_prop_carroll.min())/(rel_prop_carroll.max() - rel_prop_carroll.min())

full_rel_prop = pd.concat([rel_prop,rel_prop_gse,rel_prop_carroll])

In [None]:
study = pd.DataFrame(list(chain(["TCGA"]*fpkm_eac.shape[0],["Hoefnagel"]*tpm.shape[0],["Carroll"]*tpm2.shape[0])),
                     index=full_rel_prop.index,columns=["Study"])

In [None]:
ax = sns.clustermap(data=full_rel_prop, cmap="vlag", method="ward", )

In [None]:
lnkg = ax.dendrogram_row.linkage

In [None]:
clusters = scipy.cluster.hierarchy.fcluster(lnkg, t=2, criterion="maxclust")

In [None]:
row_colors = pd.DataFrame(clusters, index=full_rel_prop.index, columns=["Ecotype"])
row_colors = row_colors.replace({1: "brown", 2: "pink"})
row_colors.name = "Ecotype"
row_colors = pd.concat([row_colors, study.replace({"TCGA": "dimgrey", "Hoefnagel": "darkgreen", "Carroll": "cornflowerblue"})],axis=1)

In [None]:
ax = sns.clustermap(data=full_rel_prop, cmap="vlag", method="ward", row_colors=row_colors, figsize=(5,6), tree_kws={"linewidth": 2})
#ax.ax_heatmap.yaxis.set_ticklabels([])
ax.ax_heatmap.yaxis.set_visible(False)
ax.ax_heatmap.set_xticks(ax.ax_heatmap.get_xticks(),ax.ax_heatmap.get_xticklabels(),rotation=45, ha='right')
ax.figure.savefig("figures/ecotypes/ecotype_2eco_heatmap.svg", dpi=200, bbox_inches="tight")

In [None]:
ecotypes = row_colors.replace({"pink": "Immune/stromal activated", 
                    "brown": "Immune/stromal desert",})
ecotypes.columns = ["Ecotypes","Study"]

ecotypes.Study = ecotypes.Study.replace({"dimgrey": "TCGA", "darkgreen": "Hoefnagel", "cornflowerblue": "Carroll"})

df = pd.concat([state_score, ecotypes],axis=1)

In [None]:
fig, ax = plt.subplots(1,5,figsize=(10,3))
flatax = ax.flatten()
x="Ecotypes"
palette= {"Immune/stromal desert": "brown", 
                            "Immune/stromal activated": "pink",}
order = ["Immune/stromal desert","Immune/stromal activated"]

pairs = [("Immune/stromal desert","Immune/stromal activated"),]

for i,y in enumerate([f"cNMF_{i}" for i in range(1,6)]):

    sns.boxplot(data=df, x=x, y=y, ax=flatax[i], palette=palette, order=order)
    flatax[i].set_xticks(flatax[i].get_xticks(), ["Immune/stromal\ndesert", "Immune/stromal\nactivated"], 
                         rotation=60, ha="right")
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    annotator = Annotator(flatax[i], pairs, data=df,
                          x=x, y=y, order=order)
    annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside', show_test_name=False,)
    annotator.apply_and_annotate()
    flatax[i].set_ylabel(f"cNMF$_{i+1}$")
fig.tight_layout()

In [None]:
unstacked = df[["cNMF_1",'cNMF_2',"cNMF_3","cNMF_4","cNMF_5"]].unstack().reset_index()

unstacked["Ecotype"] = unstacked["level_1"].replace((df["Study"] + " " + df["Ecotypes"]).to_dict())

unstacked.columns = ["Program","Idx","Score","Ecotype"]

unstacked = unstacked.set_index("Idx")

palette = {"TCGA Immune/stromal activated": "dimgrey", "TCGA Immune/stromal desert": "silver",
          "Hoefnagel Immune/stromal activated": "darkgreen", "Hoefnagel Immune/stromal desert": "mediumaquamarine",
          "Carroll Immune/stromal activated": "cornflowerblue", "Carroll Immune/stromal desert": "lavender"}

pairs = []
programs = ["cNMF_1",'cNMF_2',"cNMF_3","cNMF_4","cNMF_5"]

for prg in programs:
    pairs.append(((prg, "TCGA Immune/stromal activated"),(prg, "TCGA Immune/stromal desert")))
    pairs.append(((prg, "Hoefnagel Immune/stromal activated"),(prg, "Hoefnagel Immune/stromal desert")))
    pairs.append(((prg, "Carroll Immune/stromal activated"),(prg, "Carroll Immune/stromal desert")))

fig, ax = plt.subplots(1,1,figsize=(5,4))
sns.boxplot(data=unstacked, x="Program", y="Score", hue="Ecotype", palette=palette, 
            hue_order=["TCGA Immune/stromal activated","TCGA Immune/stromal desert",
                       "Hoefnagel Immune/stromal activated","Hoefnagel Immune/stromal desert",
                       "Carroll Immune/stromal activated","Carroll Immune/stromal desert"])


annotator = Annotator(ax, pairs, data=unstacked, x="Program", y="Score", hue="Ecotype", palette=palette, 
            hue_order=["TCGA Immune/stromal activated","TCGA Immune/stromal desert",
                       "Hoefnagel Immune/stromal activated","Hoefnagel Immune/stromal desert",
                       "Carroll Immune/stromal activated","Carroll Immune/stromal desert"])
annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
annotator.apply_and_annotate()

plt.legend(frameon=False, bbox_to_anchor=(1,1,0,0), title="Study/Ecotype")
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ["cNMF$_{1}$","cNMF$_{2}$","cNMF$_{3}$","cNMF$_{4}$","cNMF$_{5}$"])
fig.savefig("figures/ecotypes/dist_score_ecotype_2eco_boxplot.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(2,3,figsize=(8,5))
flatax = ax.flatten()

x="Ecotypes"

hue="Study"
palette= {"Immune/stromal desert": "brown", 
                            "Immune/stromal activated": "pink",}
order = ["Immune/stromal desert","Immune/stromal activated"]

pairs = [(("Immune/stromal desert", "TCGA"), ("Immune/stromal activated", "TCGA")),
         (("Immune/stromal desert", "Hoefnagel"), ("Immune/stromal activated", "Hoefnagel")),
        (("Immune/stromal desert", "Carroll"), ("Immune/stromal activated", "Carroll")),]

showleg = [True if i==5 else False for i in range(1,6)]
for i,y in enumerate([f"cNMF_{i}" for i in range(1,6)]):
    sns.boxplot(data=df, x=x, y=y, ax=flatax[i], order=order,
                hue=hue, palette={"TCGA": "grey", "Hoefnagel": "whitesmoke", "Carroll": "cornflowerblue"})
    if showleg[i]:
        flatax[i].legend(bbox_to_anchor=(1,1,0,0),frameon=False)
    else: 
        flatax[i].legend([],[], frameon=False)
    flatax[i].set_xticks(flatax[i].get_xticks(), ["Immune/stromal\ndesert", "Immune/stromal\nactivated"], 
                         rotation=35, ha="right")
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")

    annotator = Annotator(flatax[i], pairs, data=df, hue=hue,
                          x=x, y=y, order=order)
    annotator.configure(test='Mann-Whitney', text_format='simple', show_test_name=False, loc='inside')
    annotator.apply_and_annotate()
flatax[-1].axis("off")

fig.tight_layout()
fig.savefig("figures/ecotype_dist_immune_act_perstudy.svg", dpi=200)