In [None]:
import dill
import pathlib as pl

In [None]:
import os
from tqdm.notebook import tqdm

In [None]:
import scanpy as sc

In [None]:
all_scplus = {}
for f in tqdm(work_dir.iterdir()):
    sample_name = f.stem
    if sample_name=="CCG1153_4411":
        continue
    print(sample_name)
    infile = open(f / 'scplus_obj.pkl', 'rb')
    all_scplus[sample_name] = dill.load(infile)
    infile.close()

In [None]:
from scenicplus.utils import format_egrns
for sample_name in tqdm(all_scplus):
    format_egrns(all_scplus[sample_name], eregulons_key = 'eRegulons_importance', TF2G_key = 'TF2G_adj', key_added = 'eRegulon_metadata')

In [None]:
all_scplus[sample_name].uns['eRegulon_metadata'][all_scplus[sample_name].uns['eRegulon_metadata'].TF=="BNC2"]

In [None]:
from scenicplus.eregulon_enrichment import *
for sample_name in tqdm(all_scplus):
    get_eRegulons_as_signatures(all_scplus[sample_name], eRegulon_metadata_key='eRegulon_metadata', key_added='eRegulon_signatures')

In [None]:
from scenicplus.cistromes import *
import time

for sample_name in tqdm(all_scplus):
    start_time = time.time()
    region_ranking = make_rankings(all_scplus[sample_name], target='region')
    # Score region regulons
    score_eRegulons(all_scplus[sample_name],
                    ranking = region_ranking,
                    eRegulon_signatures_key = 'eRegulon_signatures',
                    key_added = 'eRegulon_AUC',
                    enrichment_type= 'region',
                    auc_threshold = 0.05,
                    normalize = False,
                    n_cpu = 1)
    tm = time.time()-start_time
    print(sample_name,tm/60)

In [None]:
## Score transcriptome layer
# Gene based raking
from scenicplus.cistromes import *
import time

for sample_name in tqdm(all_scplus):
    start_time = time.time()
    gene_ranking = make_rankings(all_scplus[sample_name], target='gene')
    # Score gene regulons
    score_eRegulons(all_scplus[sample_name],
                    gene_ranking,
                    eRegulon_signatures_key = 'eRegulon_signatures',
                    key_added = 'eRegulon_AUC',
                    enrichment_type = 'gene',
                    auc_threshold = 0.05,
                    normalize= False,
                    n_cpu = 1)
    tm = time.time()-start_time
    print(sample_name,tm/60)

In [None]:
for sample_name in tqdm(all_scplus):

    subset_cells = all_scplus[sample_name].metadata_cell[~all_scplus[sample_name].metadata_cell["ACC_highlevel_wcancer"].isin(["cNMF_Outlier","Other"])].index
    
    all_scplus[sample_name].subset(cells=subset_cells)
    
    all_scplus[sample_name].uns["eRegulon_AUC"]["Gene_based"] = all_scplus[sample_name].uns["eRegulon_AUC"]["Gene_based"].loc[subset_cells]
    
    all_scplus[sample_name].uns["eRegulon_AUC"]["Region_based"] = all_scplus[sample_name].uns["eRegulon_AUC"]["Region_based"].loc[subset_cells]

In [None]:
# Generate pseudobulks
import time

for sample_name in tqdm(all_scplus):
    start_time = time.time()
    df = all_scplus[sample_name].metadata_cell.ACC_refined_wcancer.copy()
    df = df.apply(lambda x: x if df.value_counts().loc[x]>=10 else "Other")
    all_scplus[sample_name].metadata_cell["refined_wcancer_red"] = df
    generate_pseudobulks(all_scplus[sample_name],
                             variable = "refined_wcancer_red",
                             auc_key = 'eRegulon_AUC',
                             signature_key = 'Gene_based',
                             nr_cells = 5,
                             nr_pseudobulks = 100,
                             seed=555)
    generate_pseudobulks(all_scplus[sample_name],
                             variable = "refined_wcancer_red",
                             auc_key = 'eRegulon_AUC',
                             signature_key = 'Region_based',
                             nr_cells = 5,
                             nr_pseudobulks = 100,
                             seed=555)
    tm = time.time()-start_time
    print(tm/60)

In [None]:
# Correlation between TF and eRegulons
import time

for sample_name in tqdm(all_scplus):
    start_time = time.time()
    TF_cistrome_correlation(all_scplus[sample_name],
                            variable = 'refined_wcancer_red',
                            auc_key = 'eRegulon_AUC',
                            signature_key = 'Gene_based',
                            out_key = 'ACC_refined_wcancer_eGRN_gene_based')
    TF_cistrome_correlation(all_scplus[sample_name],
                            variable = 'refined_wcancer_red',
                            auc_key = 'eRegulon_AUC',
                            signature_key = 'Region_based',
                            out_key = 'ACC_refined_wcancer_eGRN_region_based')
    tm = time.time()-start_time
    print(tm/60)

In [None]:
# Correlation between region based regulons and gene based regulons
def select_regulons_filter(scplus_obj, corr_lim= 0.2):
    df1 = scplus_obj.uns['eRegulon_AUC']['Gene_based'].copy()
    df2 = scplus_obj.uns['eRegulon_AUC']['Region_based'].copy()
    df1.columns = [x.split('_(')[0] for x in df1.columns]
    df2.columns = [x.split('_(')[0] for x in df2.columns]
    correlations = df1.corrwith(df2, axis = 0)
    correlations = correlations[abs(correlations) > corr_lim]
    # Kepp only activator
    keep = [x for x in correlations.index if '+_+' in x] + [x for x in correlations.index if '+_-' in x] 
    # Keep extended if not direct
    extended = [x for x in keep if 'extended' in x]
    direct = [x for x in keep if not 'extended' in x]
    keep_extended = [x for x in extended if not x.replace('extended_', '') in direct]
    keep = direct + keep_extended
    # Keep regulons with more than 10 genes
    keep_gene = [x for x in scplus_obj.uns['eRegulon_AUC']['Gene_based'].columns if x.split('_(')[0] in keep]
    keep_gene = [x for x in keep_gene if (int(x.split('_(')[1].replace('g)', '')) > 10)]
    keep_all = [x.split('_(')[0] for x in keep_gene]
    keep_region = [x for x in scplus_obj.uns['eRegulon_AUC']['Region_based'].columns if x.split('_(')[0] in keep]
    scplus_obj.uns['selected_eRegulons'] = {}
    scplus_obj.uns['selected_eRegulons']['Gene_based'] = keep_gene
    scplus_obj.uns['selected_eRegulons']['Region_based'] = keep_region
    return scplus_obj

In [None]:
len(all_scplus[sample_name].uns["Cistromes"]["Unfiltered"].keys())

In [None]:
for sample_name in tqdm(all_scplus):
    all_scplus[sample_name] = select_regulons_filter(all_scplus[sample_name])

In [None]:
from scenicplus.plotting.correlation_plot import *
correlation_heatmap(all_scplus[sample_name],
                    auc_key = 'eRegulon_AUC',
                    signature_keys = ['Gene_based'],
                    selected_regulons = all_scplus[sample_name].uns['selected_eRegulons']['Gene_based'],
                    fcluster_threshold = 0.1,
                    fontsize = 10)

In [None]:
from scenicplus.RSS import *
for sample_name in tqdm(all_scplus):
    
    regulon_specificity_scores(all_scplus[sample_name],
                         'refined_wcancer_red',
                         signature_keys=['Gene_based'],
                         selected_regulons=all_scplus[sample_name].uns['selected_eRegulons']['Gene_based'],
                         out_key_suffix='_gene_based',
                         scale=False)

In [None]:
rank_df = {}
for state in ["cNMF_1","cNMF_3","cNMF_4","Carcinoma_undefined"]:
    rank_df[state] = []
    for sample_name in tqdm(all_scplus):
        df = all_scplus[sample_name].uns["RSS"]['refined_wcancer_red_gene_based']
        if state in df.index:
            print(sample_name)
            seldf = df.loc[state,:].sort_values(ascending=False).head(10)
            seldf.name = sample_name
            seldf.index = seldf.index.str.split("_").str[0]
            seldf = seldf.groupby(level=0).mean()
            rank_df[state].append(seldf)
    rank_df[state] = pd.concat(rank_df[state],axis=1)

In [None]:
seltfs = {}
for state in ["cNMF_1","cNMF_3","cNMF_4","Carcinoma_undefined"]:
    seltfs[state] = rank_df[state][(~rank_df[state].isna()).sum(axis=1)>=2]

In [None]:
seltfs["cNMF_4"]

In [None]:
all_seltfs = []
for state in seltfs:
    all_seltfs.append(seltfs[state].index)
all_seltfs = np.unique(np.hstack(all_seltfs))

In [None]:
all_seltfs

In [None]:
rank_df_TME = {}
for ct in ["Hepatic EC", "Kupffer cells", "TAM2", "TAM1", "Quiescent EC", "Inflammatory CAF", "HGF-CAF", "Fibroblast", 
           "DC", "B", "TCD4", "TCD8", "Angiogenic EC","NK","Treg","Skeletal muscle", "Smooth muscle"]:
    print(ct)
    rank_df_TME[ct] = []
    for sample_name in tqdm(all_scplus):
        df = all_scplus[sample_name].uns["RSS"]['refined_wcancer_red_gene_based']
        if ct in df.index:
            print(sample_name)
            seldf = df.loc[ct,:].sort_values(ascending=False).head(10)
            seldf.name = sample_name
            seldf.index = seldf.index.str.split("_").str[0]
            rank_df_TME[ct].append(seldf)
    if len(rank_df_TME[ct])==0:
        continue
    else:
        rank_df_TME[ct] = pd.concat(rank_df_TME[ct],axis=1)

In [None]:
seltfs_TME = {}
for state in rank_df_TME.keys():
    if len(rank_df_TME[state])>0:
        seltfs_TME[state] = rank_df_TME[state][(~rank_df_TME[state].isna()).sum(axis=1)>=2]

In [None]:
all_seltfs_TME = []
for state in seltfs_TME:
    all_seltfs_TME.append(seltfs_TME[state].index)
all_seltfs_TME = np.unique(np.hstack(all_seltfs_TME))

# Plotting SCENIC+ results

## cNMF results

In [None]:
all_seltfs = ['KLF5', 'ELF3', 'SMAD3', 'TCF7L2',  'HMGA2',"BNC2"]
tf_order = all_seltfs

In [None]:
celltype_order = ["Endothelial","Epithelial","Fibroblast","Stromal_Muscle","Lymphoid",
                  "Myeloid","Nerve_adrenal","cNMF_3","cNMF_1","cNMF_4","Carcinoma_undefined"]

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    gex_adata = sc.AnnData(all_scplus[sample_name].to_df("EXP").copy())
    sc.pp.normalize_total(adata=gex_adata, target_sum=10000)
    sc.pp.log1p(gex_adata)
    gex_df = pd.DataFrame(gex_adata[:,all_seltfs].X.copy(),
                          index=all_scplus[sample_name].cell_names,
                          columns=all_seltfs)
    plot_df = pd.concat([gex_df,
                     all_scplus[sample_name].metadata_cell["ACC_highlevel_wcancer"]],axis=1)

    all_plot_dfs.append(plot_df)
all_plot_dfs = pd.concat(all_plot_dfs)

full_auc = all_plot_dfs.drop("ACC_highlevel_wcancer",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["ACC_highlevel_wcancer"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="ACC_highlevel_wcancer").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,5))
sns.heatmap(data=all_plot_dfs.loc[celltype_order,tf_order], cmap="vlag", 
            center=0, ax=ax, cbar_kws={"label": "TF Z-score"}, linewidths=1, linecolor="grey")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/malignant/cNMF_scenic_TF_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    gex_adata = sc.AnnData(all_scplus[sample_name].to_df("EXP").copy(),
                           obs=all_scplus[sample_name].metadata_cell[["ACC_highlevel_celltype"]])
    sc.pp.normalize_total(adata=gex_adata, target_sum=10000)
    sc.pp.log1p(gex_adata)
    gex_df = pd.DataFrame(gex_adata[gex_adata.obs["ACC_highlevel_celltype"]=="Carcinoma",all_seltfs].X.copy(),
                          index=gex_adata.obs[gex_adata.obs["ACC_highlevel_celltype"]=="Carcinoma"].index,
                          columns=all_seltfs)
    plot_df = pd.concat([gex_df,
                     all_scplus[sample_name].metadata_cell["ACC_highlevel_wcancer"]],axis=1).dropna()

    all_plot_dfs.append(plot_df)
all_plot_dfs = pd.concat(all_plot_dfs)

full_auc = all_plot_dfs.drop("ACC_highlevel_wcancer",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["ACC_highlevel_wcancer"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="ACC_highlevel_wcancer").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.heatmap(data=all_plot_dfs.loc[["cNMF_3","cNMF_1","cNMF_4","Carcinoma_undefined"],tf_order], cmap="vlag", 
            center=0, ax=ax, cbar_kws={"label": "TF Z-score"},linewidths=1, linecolor="grey")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, ha="right")
ax.set_ylabel("")
fig.savefig("figures/malignant/cNMF_scenic_TF_zscore_malonly.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    auc_alltfs = []
    for tf in all_seltfs:
        auc_df = all_scplus[sample_name].uns["eRegulon_AUC"]["Gene_based"].copy()
        auc_df = auc_df.loc[:,auc_df.columns.str.contains(tf)]
        if auc_df.shape[1]==0:
            continue
        
        if auc_df.shape[1]>1:
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("extended")]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("+_-", regex=False)]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("-_+", regex=False)]
        auc_df.columns = auc_df.columns.str.split("_").str[0]
        auc_alltfs.append(auc_df)
    auc_alltfs = pd.concat(auc_alltfs,axis=1)
    auc_alltfs = pd.concat([auc_alltfs, all_scplus[sample_name].metadata_cell["ACC_highlevel_wcancer"]],
                           axis=1)
    all_plot_dfs.append(auc_alltfs)

all_plot_dfs = pd.concat(all_plot_dfs).fillna(0)

full_auc = all_plot_dfs.drop("ACC_highlevel_wcancer",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["ACC_highlevel_wcancer"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="ACC_highlevel_wcancer").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,5))
sns.heatmap(data=all_plot_dfs.loc[celltype_order,tf_order], cmap="vlag", 
            center=0, vmin=-1, vmax=1, ax=ax, linewidths=1, linecolor="grey",
            cbar_kws={"label": "eRegulon Gene-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/malignant/cNMF_scenic_eRegulon_gene_based_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    auc_alltfs = []
    selcells = all_scplus[sample_name].metadata_cell[all_scplus[sample_name].metadata_cell["ACC_highlevel_annotation"]=="Carcinoma"].index
    for tf in all_seltfs:
        auc_df = all_scplus[sample_name].uns["eRegulon_AUC"]["Gene_based"].copy()
        auc_df = auc_df.loc[selcells,auc_df.columns.str.contains(tf)]
        if auc_df.shape[1]==0:
            continue
        
        if auc_df.shape[1]>1:
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("extended")]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("+_-", regex=False)]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("-_+", regex=False)]
            
        auc_df.columns = auc_df.columns.str.split("_").str[0]
        auc_alltfs.append(auc_df)
    auc_alltfs = pd.concat(auc_alltfs,axis=1)
    auc_alltfs = pd.concat([auc_alltfs, all_scplus[sample_name].metadata_cell.loc[selcells,"ACC_highlevel_wcancer"]],
                           axis=1)
    all_plot_dfs.append(auc_alltfs)

all_plot_dfs = pd.concat(all_plot_dfs).fillna(0)

full_auc = all_plot_dfs.drop("ACC_highlevel_wcancer",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["ACC_highlevel_wcancer"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="ACC_highlevel_wcancer").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.heatmap(data=all_plot_dfs.loc[["cNMF_3","cNMF_1","cNMF_4","Carcinoma_undefined"],
            tf_order], cmap="vlag", center=0, vmin=-1, vmax=1, ax=ax, linewidths=1, linecolor="grey",
            cbar_kws={"label": "eRegulon Gene-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/malignant/cNMF_scenic_eRegulon_gene_based_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    auc_alltfs = []
    for tf in all_seltfs:
        auc_df = all_scplus[sample_name].uns["eRegulon_AUC"]["Region_based"].copy()
        auc_df = auc_df.loc[:,auc_df.columns.str.contains(tf)]
        if auc_df.shape[1]==0:
            continue
        
        if auc_df.shape[1]>1:
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("extended")]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("+_-", regex=False)]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("-_+", regex=False)]
        auc_df.columns = auc_df.columns.str.split("_").str[0]
        auc_alltfs.append(auc_df)
    auc_alltfs = pd.concat(auc_alltfs,axis=1)
    auc_alltfs = pd.concat([auc_alltfs, all_scplus[sample_name].metadata_cell["ACC_highlevel_wcancer"]],
                           axis=1)
    all_plot_dfs.append(auc_alltfs)

all_plot_dfs = pd.concat(all_plot_dfs).fillna(0)

full_auc = all_plot_dfs.drop("ACC_highlevel_wcancer",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["ACC_highlevel_wcancer"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="ACC_highlevel_wcancer").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,5))
sns.heatmap(data=all_plot_dfs.loc[celltype_order,tf_order], cmap="vlag", 
            center=0, vmin=-1, vmax=1, ax=ax, linewidths=1, linecolor="grey",
            cbar_kws={"label": "eRegulon Region-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/malignant/cNMF_scenic_eRegulon_region_based_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    auc_alltfs = []
    selcells = all_scplus[sample_name].metadata_cell[all_scplus[sample_name].metadata_cell["ACC_highlevel_annotation"]=="Carcinoma"].index
    for tf in all_seltfs:
        auc_df = all_scplus[sample_name].uns["eRegulon_AUC"]["Region_based"].copy()
        auc_df = auc_df.loc[selcells,auc_df.columns.str.contains(tf)]
        if auc_df.shape[1]==0:
            continue
        
        if auc_df.shape[1]>1:
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("extended")]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("+_-", regex=False)]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("-_+", regex=False)]
        auc_df.columns = auc_df.columns.str.split("_").str[0]
        auc_alltfs.append(auc_df)
    auc_alltfs = pd.concat(auc_alltfs,axis=1)
    auc_alltfs = pd.concat([auc_alltfs, all_scplus[sample_name].metadata_cell.loc[selcells,"ACC_highlevel_wcancer"]],
                           axis=1)
    all_plot_dfs.append(auc_alltfs)

all_plot_dfs = pd.concat(all_plot_dfs).fillna(0)

full_auc = all_plot_dfs.drop("ACC_highlevel_wcancer",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["ACC_highlevel_wcancer"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="ACC_highlevel_wcancer").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,3))
sns.heatmap(data=all_plot_dfs.loc[["cNMF_3","cNMF_1","cNMF_4","Carcinoma_undefined"],
            tf_order], cmap="vlag", center=0, vmin=-1, vmax=1, ax=ax, linewidths=1, linecolor="grey",
            cbar_kws={"label": "eRegulon Region-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/malignant/cNMF_scenic_eRegulon_gene_based_zscore.svg", dpi=200, bbox_inches="tight")

## TME results

In [None]:
tf_order = ["ERG", "ELK3", "HLX", "ETS1", "FLI1",  "RUNX3",
            "IKZF1","IKZF3","NFATC2","NFATC3",
            "VDR","IRF8","SPI1","IRF5","RBPJ","ETV5", "MAFB","MITF","MEF2C",
            "BNC2","RUNX1","RUNX2","PRRX1","NFATC4","RARB","SOX5",
            "TCF4","TCF7L1","WT1"]

In [None]:
celltype_order = ["Angiogenic EC", "Hepatic EC", "Quiescent EC", "B", "TCD4", "TCD8", "Treg", "NK",
                  "TAM1","TAM2", "DC","Kupffer cells",
                   "Inflammatory CAF", "HGF-CAF", "Fibroblast", "Smooth muscle", "Skeletal muscle",
                  "cNMF_1","cNMF_3","cNMF_4","Carcinoma_undefined"]

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    gex_adata = sc.AnnData(all_scplus[sample_name].to_df("EXP").copy())
    sc.pp.normalize_total(adata=gex_adata, target_sum=10000)
    sc.pp.log1p(gex_adata)
    gex_df = pd.DataFrame(gex_adata[:,all_seltfs_TME].X.copy(),
                          index=all_scplus[sample_name].cell_names,
                          columns=all_seltfs_TME)
    plot_df = pd.concat([gex_df,
                     all_scplus[sample_name].metadata_cell["refined_wcancer_red"].replace({"TAM3": "TAM2"})],axis=1)

    all_plot_dfs.append(plot_df)
all_plot_dfs = pd.concat(all_plot_dfs)

full_gex = all_plot_dfs.iloc[:,:-1].copy()
full_gex = (full_gex - full_gex.mean())/full_gex.std()
all_plot_dfs = pd.concat([full_gex,all_plot_dfs.iloc[:,-1]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="refined_wcancer_red").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,8))
sns.heatmap(data=all_plot_dfs.loc[celltype_order,tf_order], cmap="vlag", center=0, vmin=-1, vmax=2, ax=ax, cbar_kws={"label": "TF Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/TME/TME_scenic_TF_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.heatmap(data=all_plot_dfs.loc[["Inflammatory CAF","HGF-CAF","Fibroblast"],["BNC2","RUNX1","RUNX2","PRRX1","NFATC4","ETS1"]], cmap="vlag", center=0, vmin=-1, vmax=2, ax=ax, cbar_kws={"label": "TF Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/TME/TME_Fibroblasts_scenic_TF_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    auc_alltfs = []
    for tf in all_seltfs_TME:
        auc_df = all_scplus[sample_name].uns["eRegulon_AUC"]["Gene_based"].copy()
        auc_df = auc_df.loc[:,auc_df.columns.str.contains(tf)]
        if auc_df.shape[1]==0:
            continue
        
        if auc_df.shape[1]>1:
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("extended")]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("+_-", regex=False)]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("-_+", regex=False)]
        auc_df.columns = auc_df.columns.str.split("_").str[0]
        auc_alltfs.append(auc_df)
    auc_alltfs = pd.concat(auc_alltfs,axis=1)
    auc_alltfs = pd.concat([auc_alltfs, all_scplus[sample_name].metadata_cell["refined_wcancer_red"].replace({"TAM3": "TAM2"})],
                           axis=1)
    all_plot_dfs.append(auc_alltfs)

all_plot_dfs = pd.concat(all_plot_dfs).fillna(0)

full_auc = all_plot_dfs.drop("refined_wcancer_red",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["refined_wcancer_red"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="refined_wcancer_red").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,8))
sns.heatmap(data=all_plot_dfs.loc[celltype_order,tf_order], cmap="vlag", center=0, vmin=-1, vmax=2, ax=ax, cbar_kws={"label": "eRegulon Gene-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/TME/TME_scenic_eRegulon_gene_based_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.heatmap(data=all_plot_dfs.loc[["Inflammatory CAF","HGF-CAF","Fibroblast"],
            ["BNC2","RUNX1","RUNX2","PRRX1","NFATC4","ETS1"]], cmap="vlag", center=0, vmin=-1, vmax=3, ax=ax, cbar_kws={"label": "eRegulon Gene-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/TME/TME_Fibroblasts_eRegulon_gene_based_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
all_plot_dfs = []
for sample_name in all_scplus:
    auc_alltfs = []
    for tf in all_seltfs_TME:
        auc_df = all_scplus[sample_name].uns["eRegulon_AUC"]["Region_based"].copy()
        auc_df = auc_df.loc[:,auc_df.columns.str.contains(tf)]
        if auc_df.shape[1]==0:
            continue
        
        if auc_df.shape[1]>1:
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("extended")]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("+_-", regex=False)]
            auc_df = auc_df.loc[:,~auc_df.columns.str.contains("-_+", regex=False)]
        auc_df.columns = auc_df.columns.str.split("_").str[0]
        auc_alltfs.append(auc_df)
    auc_alltfs = pd.concat(auc_alltfs,axis=1)
    auc_alltfs = pd.concat([auc_alltfs, all_scplus[sample_name].metadata_cell["refined_wcancer_red"].replace({"TAM3": "TAM2"})],
                           axis=1)
    all_plot_dfs.append(auc_alltfs)

all_plot_dfs = pd.concat(all_plot_dfs).fillna(0)

full_auc = all_plot_dfs.drop("refined_wcancer_red",axis=1).copy()
full_auc = (full_auc - full_auc.mean())/full_auc.std()
all_plot_dfs = pd.concat([full_auc,all_plot_dfs["refined_wcancer_red"]],axis=1)

all_plot_dfs = all_plot_dfs.groupby(by="refined_wcancer_red").mean()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,8))
sns.heatmap(data=all_plot_dfs.loc[celltype_order,tf_order], cmap="vlag", center=0, vmin=-1, vmax=2, ax=ax, cbar_kws={"label": "eRegulon Region-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/TME/TME_scenic_eRegulon_region_based_zscore.svg", dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.heatmap(data=all_plot_dfs.loc[["Inflammatory CAF","HGF-CAF","Fibroblast"],
            ["BNC2","RUNX1","RUNX2","PRRX1","NFATC4","ETS1"]], cmap="vlag", center=0, vmin=-1, vmax=2, ax=ax, cbar_kws={"label": "eRegulon Region-based expression, Z-score"})
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set_ylabel("")
fig.savefig("figures/TME/TME_Fibroblasts_eRegulon_region_based_zscore.svg", dpi=200, bbox_inches="tight")