# Motivation

This is a clean version of the experiments I performed for the CN Poster and Extended abstract. Some relatable notebooks are:

* **SBM_h42_TF_network_effects** - where i am looking at selective edge pruning
* **SBM_allHelathy_gc42** - where i'm analysing the all_healthy data

This is a modified version of the notebook CN_SBM_allHealty.ipynb from iNET repository.

# Init

In [46]:
%load_ext autoreload
import pandas as pd
import numpy as np
import os
import sys

import plotly.express as px
import plotly.io as pio


import multiprocess as mp


# own libraries
sys.path.append('/Users/vlad/Developer/York/iNet_v2/src/')

from NetworkAnalysis.ExperimentSet import ExperimentSet
from NetworkAnalysis.NetworkOutput import NetworkOutput

from NetworkAnalysis import GraphHelper as gh
from NetworkAnalysis.utilities import sankey_consensus_plot as sky
from NetworkAnalysis.utilities.helpers import save_fig
from NetworkAnalysis.utilities.helpers import save_fig, survival_plot, survival_comp
from NetworkAnalysis.GraphToolExp import GraphToolExperiment as GtExp
sys.path.append(os.path.dirname("../../src"))

# Gsea libraries

import graph_tool.all as gt

# matplotlib.use("gtk3agg")

# %matplotlib inline
pio.renderers.default = "plotly_mimetype+notebook"
pio.templates.default = "ggplot2"

pool = mp.Pool(mp.cpu_count())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [47]:
results_path = "../../results/exp/"
data_base = "../../data/"
base_path = "../../results/"

exp_folder_tumour = "network_I/gc_42/tum/"  # "/integration_v2.1/tum/" - path from iNET
exp_folder_h47_ctrl = '../results/network_I/gc_42/tum_ctrls/'

figures_path = "gc42_ctrls_tum/"

vu_output = pd.read_csv(f"{data_base}/metadata/VU_clustering_v3.tsv", sep="\t", index_col="Sample")

tcga_mutations_df = pd.read_csv(f"{data_base}/tumour/mutations_tcga.csv")
tcga_mutations_df = tcga_mutations_df[tcga_mutations_df["count"] != 0].set_index("gene")

all_tum_tpms = pd.read_csv(f"{data_base}/tumour/TPMs_selected_genes_v3_13k_gc42.tsv", sep="\t", index_col="genes")

# tf list
tf_path = f"{data_base}/metadata/TF_names_v_1.01.txt"
if os.path.exists(tf_path):
    tf_list = np.genfromtxt(fname=tf_path, delimiter="\t", skip_header=1, dtype="str")

## Load experiment sets

In [48]:
%autoreload 2
tum = ExperimentSet("tum", base_path, exp_folder_tumour, tcga_mutations_df, sel_sets = ["5K"], rel_path="../", exp_type='iNet')

# Load the objects too
t_exps, t_entropy = GtExp.load_hsbm_exps(tum)
tum.exps = t_exps
t_entropy["Type"] = "Experiment"

##### Experiment labels:  dict_keys(['standard_5K_12TF_hsbm', 'standard_5K_11TF_hsbm', 'standard_5K_4TF_hsbm', 'standard_5K_9TF_hsbm', 'standard_5K_10TF_hsbm', 'standard_5K_5TF_hsbm', 'standard_5K_3TF_hsbm', 'standard_5K_8TF_hsbm', 'standard_5K_7TF_hsbm', 'standard_5K_6TF_hsbm'])
Loading Graph-Tool for standard_5K_12TF_hsbm
Loading Graph-Tool for standard_5K_11TF_hsbm
Loading Graph-Tool for standard_5K_4TF_hsbm
Loading Graph-Tool for standard_5K_9TF_hsbm
Loading Graph-Tool for standard_5K_10TF_hsbm
Loading Graph-Tool for standard_5K_5TF_hsbm
Loading Graph-Tool for standard_5K_3TF_hsbm
Loading Graph-Tool for standard_5K_8TF_hsbm
Loading Graph-Tool for standard_5K_7TF_hsbm
Loading Graph-Tool for standard_5K_6TF_hsbm


In [49]:
folders = next(os.walk(f'{base_path}/{exp_folder_h47_ctrl}'), (None, None, []))[1]
exp_ctrls = {}

args, t_ctrls = [], {}
for folder in folders:
    ctrl_path = "{}/{}/".format(exp_folder_h47_ctrl, folder)
    idx = int(folder.split("_")[-1])
    t_ctrls[idx] = ExperimentSet("tumCtrl", base_path, ctrl_path, tcga_mutations_df, sel_sets=["5K"], rel_path="../", exp_type='iNet')

##### Experiment labels:  dict_keys(['standard_5K_3TF_hsbm', 'standard_5K_4TF_hsbm', 'standard_5K_5TF_hsbm', 'standard_5K_9TF_hsbm', 'standard_5K_7TF_hsbm', 'standard_5K_11TF_hsbm', 'standard_5K_6TF_hsbm', 'standard_5K_10TF_hsbm', 'standard_5K_8TF_hsbm', 'standard_5K_12TF_hsbm'])
##### Experiment labels:  dict_keys(['standard_5K_9TF_hsbm', 'standard_5K_11TF_hsbm', 'standard_5K_7TF_hsbm', 'standard_5K_5TF_hsbm', 'standard_5K_3TF_hsbm', 'standard_5K_6TF_hsbm', 'standard_5K_10TF_hsbm', 'standard_5K_8TF_hsbm', 'standard_5K_12TF_hsbm', 'standard_5K_4TF_hsbm'])
##### Experiment labels:  dict_keys(['standard_5K_9TF_hsbm', 'standard_5K_11TF_hsbm', 'standard_5K_7TF_hsbm', 'standard_5K_5TF_hsbm', 'standard_5K_3TF_hsbm', 'standard_5K_8TF_hsbm', 'standard_5K_12TF_hsbm', 'standard_5K_6TF_hsbm', 'standard_5K_10TF_hsbm', 'standard_5K_4TF_hsbm'])
##### Experiment labels:  dict_keys(['standard_5K_3TF_hsbm', 'standard_5K_9TF_hsbm', 'standard_5K_5TF_hsbm', 'standard_5K_11TF_hsbm', 'standard_5K_7TF_hsbm',

In [50]:
ctrl_exps, cmb_df = {}, pd.DataFrame()
for key in range(1, 11, 1):
    exps, entropy = GtExp.load_hsbm_exps(t_ctrls[key])
    entropy["Type"] = f"tCtrl{key}"
    cmb_df = pd.concat([cmb_df, entropy], axis=0)
    ctrl_exps[key] = {"entropy": entropy, "exps": exps}

cmb_df = pd.concat([cmb_df, t_entropy], axis=0)
cmb_df["Entropy_norm"] = (cmb_df["Entropy"] - cmb_df["Entropy"].min()) / (cmb_df["Entropy"].max() - cmb_df["Entropy"].min())
cmb_df["Entropy_log10"] = np.log10(cmb_df["Entropy"])

Loading Graph-Tool for standard_5K_7TF_hsbm
Loading Graph-Tool for standard_5K_9TF_hsbm
Loading Graph-Tool for standard_5K_11TF_hsbm
Loading Graph-Tool for standard_5K_3TF_hsbm
Loading Graph-Tool for standard_5K_5TF_hsbm
Loading Graph-Tool for standard_5K_10TF_hsbm
Loading Graph-Tool for standard_5K_8TF_hsbm
Loading Graph-Tool for standard_5K_12TF_hsbm
Loading Graph-Tool for standard_5K_6TF_hsbm
Loading Graph-Tool for standard_5K_4TF_hsbm
Loading Graph-Tool for standard_5K_9TF_hsbm
Loading Graph-Tool for standard_5K_11TF_hsbm
Loading Graph-Tool for standard_5K_7TF_hsbm
Loading Graph-Tool for standard_5K_5TF_hsbm
Loading Graph-Tool for standard_5K_3TF_hsbm
Loading Graph-Tool for standard_5K_8TF_hsbm
Loading Graph-Tool for standard_5K_12TF_hsbm
Loading Graph-Tool for standard_5K_6TF_hsbm
Loading Graph-Tool for standard_5K_10TF_hsbm
Loading Graph-Tool for standard_5K_4TF_hsbm
Loading Graph-Tool for standard_5K_9TF_hsbm
Loading Graph-Tool for standard_5K_11TF_hsbm
Loading Graph-Tool for st

# Selective edge pruning

In [51]:
#### Find the network with top model
top_exp, color_map = [], {}
for exp in cmb_df["Type"].unique():
    sel_df = cmb_df[cmb_df["Type"] == exp]
    for tf in sel_df["TF"].unique():
        tst_df = sel_df[sel_df["TF"] == tf].sort_values(by="Entropy", ascending=False).values[0]
        top_exp.append(tst_df)

    color_map[exp] = px.colors.qualitative.Plotly[0]
    if exp == "Experiment":
        color_map[exp] = px.colors.qualitative.Plotly[2]

top_exp = pd.DataFrame(top_exp, columns=["Entropy", "TF", "Type", "Entropy_norm", "Entropy_log"])

#### Compute stats
stats_vals = []
for tf in top_exp["TF"].unique():
    sel_df = top_exp[top_exp["TF"] == tf]
    sel_df_ctrl = sel_df[sel_df["Type"].str.contains("tCtrl")]["Entropy"]
    sel_df_exp = sel_df[~sel_df["Type"].str.contains("tCtrl")]["Entropy"].values[0]
    stats_vals.append((sel_df_ctrl.median(), sel_df_ctrl.mean(), sel_df_ctrl.std(), tf, "Control"))
    stats_vals.append((sel_df_exp, sel_df_exp, 0, tf, "Experiment"))

In [74]:
top_df = pd.DataFrame(stats_vals, columns=["Median_Ent", "Mean_Ent", "Std_Ent", "TF", "Type"])

color_map = {"Experiment": px.colors.qualitative.G10[0], "Control": px.colors.qualitative.G10[1]}
fig = px.scatter(
    top_df,
    y="Mean_Ent",
    x="TF",
    error_y="Std_Ent",
    color="Type",
    title="Entropy evolution TFs 3-15. Standard 5K",
    height=600,
    color_discrete_map=color_map,
)

fig.update_layout(xaxis=dict(tickmode="array", tickvals=list(range(3, 16, 1))))
fig.update_layout(
    legend=dict(
        orientation="h",
        title="SBM",
        yanchor="bottom",
        y=0.87,
        xanchor="center",
        x=0.5,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=16, color="#003366"),
    ),
    title="",
    xaxis=dict(
        tickfont=dict(size=16),
        title="Minimum degree for selected genes",
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        title="Entropy",
    ),
    font=dict(size=16),
)
fig.update_traces(marker_size=8)
fig.update_traces(error_y=dict(width=8), selector=({"name": "Control"}))
fig.show()
save_fig(name="sbm_ent_sel_prun", fig=fig, base_path=figures_path, width=1200, height=600)

# Leiden and SBM comparisons 

## Modularity Score

In [53]:
def prep_leiden(exp: ExperimentSet, label="Experiment"):
    leid_stats = exp.comb_leiden_scores()
    leid_stats = leid_stats.loc[leid_stats["Modifier"] == "Standard"]
    leid_stats["Type"] = label

    sel_cols = ["Leiden Rank", "ModularityScore", "ModuleNum", "AvgModSize", "Modifier", "Type", "TF", "AvgModuleNum"]

    return leid_stats[sel_cols]


def tf_stats(ctrls_df: pd.DataFrame, exp_df: pd.DataFrame):
    for tf in ctrls_df["TF"].unique():
        sel_df = ctrls_df[ctrls_df["TF"] == tf]
        sel_df_exp = exp_df[exp_df["TF"] == tf]

        r_field = {"ModularityScore": "Mod", "AvgModuleNum": "ModNum", "AvgModSize": "ModSize"}
        for field in ["ModularityScore", "AvgModuleNum", "AvgModSize"]:
            ctrls_df.loc[ctrls_df["TF"] == tf, "{}_median".format(r_field[field])] = sel_df[field].median(axis=0)
            ctrls_df.loc[ctrls_df["TF"] == tf, "{}_mean".format(r_field[field])] = sel_df[field].mean(axis=0)
            ctrls_df.loc[ctrls_df["TF"] == tf, "{}_std".format(r_field[field])] = sel_df[field].std(axis=0)

            exp_df.loc[exp_df["TF"] == tf, "{}_median".format(r_field[field])] = sel_df_exp[field].median(axis=0)
            exp_df.loc[exp_df["TF"] == tf, "{}_mean".format(r_field[field])] = sel_df_exp[field].mean(axis=0)
            exp_df.loc[exp_df["TF"] == tf, "{}_std".format(r_field[field])] = sel_df_exp[field].std(axis=0)

    ctrls_df["Type"] = "Control"
    comb_leiden = pd.concat([ctrls_df, exp_df], axis=0)
    comb_leiden["TF"] = comb_leiden["TF"].astype(int)
    comb_leiden = comb_leiden[comb_leiden["TF"] <= 15]
    comb_leiden.sort_values(by="TF", ascending=True, inplace=True)
    comb_leiden["TF"] = comb_leiden["TF"].astype(str)

    return comb_leiden

In [55]:
leid_h = prep_leiden(tum, label="Experiment")

leid_ctrls = []
for idx in range(1, 11, 1):
    df = prep_leiden(t_ctrls[idx], label="tCtrl{}".format(idx))
    leid_ctrls.append(df)

all_leid_ctrls = pd.concat(leid_ctrls, axis=0)
top_leid_ctrls = all_leid_ctrls[all_leid_ctrls["Leiden Rank"] == 0]

In [76]:
top_comb_leiden = tf_stats(top_leid_ctrls, leid_h[leid_h["Leiden Rank"] == 0])
fig = px.line(
    top_comb_leiden,
    x="TF",
    y="Mod_mean",
    error_y="Mod_std",
    markers=True,
    color="Type",
    height=600,
    title="Modularity scores for the best top Leiden Run.",
    color_discrete_map=color_map,
)
fig.update_layout(
    legend=dict(
        orientation="h",
        title="Leiden",
        yanchor="bottom",
        y=0.87,
        xanchor="center",
        x=0.5,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=16, color="#003366"),
    ),
    title="",
    xaxis=dict(
        tickfont=dict(size=16),
        title="Minimum degree for selected genes",
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        title="Modularity Score",
    ),
    font=dict(size=16),
)
fig.update_traces(line_width=5, marker_size=12, error_y=dict(width=5))

fig.show()
save_fig(name="leid_mod_sel_prun", fig=fig, base_path=figures_path, width=1200, height=600)

In [57]:
# top_leid_ctrls["TF"] = top_leid_ctrls["TF"].astype(int)
# top_leid_ctrls.sort_values(by="TF", ascending=True)
# x = list(top_leid_ctrls["TF"].astype(int))
# y_upper = list(top_leid_ctrls["Mod_mean"] + top_leid_ctrls["Mod_std"])
# y_lower = list(top_leid_ctrls["Mod_mean"] - top_leid_ctrls["Mod_std"])


# fig.add_trace(
#     go.Scatter(
#         x=x + x[::-1],  # x, then x reversed
#         y=y_upper + y_lower[::-1],  # upper, then lower reversed
#         fill="toself",
#         fillcolor="rgba(0,100,80,0.2)",
#         line=dict(color="rgba(255,255,255,0)"),
#         hoverinfo="skip",
#         showlegend=False,
#     )
# )

# fig.show()

In [77]:
leid_fig = px.line(
    top_comb_leiden,
    x="TF",
    y="ModNum_mean",
    error_y="ModNum_std",
    markers=True,
    color="Type",
    height=600,
    title="Community sizes for the best top Leiden Run.",
    color_discrete_map=color_map,
)
leid_fig.update_layout(
    legend=dict(
        orientation="h",
        title="Leiden",
        yanchor="bottom",
        y=0.10,
        xanchor="center",
        x=0.15,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=16, color="#003366"),
    ),
    title="",
    xaxis=dict(
        tickfont=dict(size=16),
        title="Minimum degree for selected genes",
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        tickmode="linear",
        title="Number of communities",
    ),
    font=dict(size=16),
)
leid_fig.update_traces(line_width=5, marker_size=12, error_y=dict(width=5))
leid_fig.show()
save_fig(name="leid_comNum_sel_prun", fig=leid_fig, base_path=figures_path, width=1400, height=700)

## SBM - Community size

In [59]:
ctrl_sizes, h_sizes = [], []
for key, val in t_exps.items():
    results_df, _ = val.hsbm_get_gt_df()
    tf = NetworkOutput.extract_tf_number(key)
    h_sizes.append(((tf, "Experiment", len(results_df["max_b"].unique()))))
    for idx in range(1, 10, 1):
        control_res, _ = ctrl_exps[idx]["exps"][key].hsbm_get_gt_df()
        ctrl_sizes.append(((tf, "hCtrl{}".format(idx), len(control_res["max_b"].unique()))))

ctrl_sizes = pd.DataFrame(ctrl_sizes, columns=["TF", "Type", "Com_size"])
ctrl_sizes["Type"] = "Control"
h_sizes = pd.DataFrame(h_sizes, columns=["TF", "Type", "Com_size"])

In [60]:
for tf in ctrl_sizes["TF"].unique():
    sel_df = ctrl_sizes[(ctrl_sizes["TF"] == tf)]
    sel_df_exp = h_sizes[h_sizes["TF"] == tf]

    ctrl_sizes.loc[ctrl_sizes["TF"] == tf, "Size_median"] = sel_df["Com_size"].median(axis=0)
    ctrl_sizes.loc[ctrl_sizes["TF"] == tf, "Size_mean"] = sel_df["Com_size"].mean(axis=0)
    ctrl_sizes.loc[ctrl_sizes["TF"] == tf, "Size_std"] = sel_df["Com_size"].std(axis=0)

    h_sizes.loc[h_sizes["TF"] == tf, "Size_median"] = sel_df_exp["Com_size"].median(axis=0)
    h_sizes.loc[h_sizes["TF"] == tf, "Size_mean"] = sel_df_exp["Com_size"].mean(axis=0)
    h_sizes.loc[h_sizes["TF"] == tf, "Size_std"] = 0

comb_df = pd.concat([ctrl_sizes, h_sizes], axis=0)

In [78]:
if True:
    comb_df.sort_values('TF', inplace=True)
    sbm_fig = px.line(
        comb_df,
        x="TF",
        y="Size_mean",
        color="Type",
        error_y="Size_std",
        markers=True,
        title="SBM. Community sizes",
        height=600,
        color_discrete_map=color_map,
    )
    sbm_fig.update_layout(
        legend=dict(
            orientation="h",
            title="Type",
            yanchor="bottom",
            xanchor="center",
            y=0.8,
            x=0.15,
            bgcolor="rgba(0,0,0,0)",
            font=dict(size=16, color="#003366"),
        ),
        title="",
        xaxis=dict(
            tickfont=dict(size=16),
            title="Minimum degree for selected genes",
        ),
        yaxis=dict(
            tickfont=dict(size=16),
            tickmode="linear",
            # tick0=3,
            # dtick=1,
            title="Number of communities",
        ),
        font=dict(size=16),
    )
    sbm_fig.update_traces(line_width=5, marker_size=12, error_y=dict(width=5))
    sbm_fig.show()
    save_fig(name="sbm_comNum_sel_prun", fig=sbm_fig, base_path=figures_path, width=1200, height=600)

### SBM + Leiden Community size

In [79]:
sbm_fig = sbm_fig.add_traces(leid_fig.data)
sbm_fig = sbm_fig.update_layout(
    legend=dict(
        orientation="h",
        title="Type",
        yanchor="bottom",
        y=0.4,
        xanchor="center",
        x=0.5,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=16, color="#003366"),
    ),
    xaxis=dict(
        tickfont=dict(size=16),
        title="Minimum degree for selected genes",
    ),
    title="",
    yaxis=dict(
        tickmode="linear",
        tick0=10,
        dtick=2,
    ),
    showlegend=True,
    font=dict(size=16),
)
sbm_fig = sbm_fig.update_traces(line_width=5, marker_size=12, error_y=dict(width=5))
legend_shown = {}
for trace in sbm_fig.data:
    if trace.name in legend_shown:
        trace.showlegend = False
    else:
        trace.showlegend = True
        legend_shown[trace.name] = True

sbm_fig.add_annotation(x=2.5, y=46, text="SBM", showarrow=False, font=dict(size=16, color="#003366"))
sbm_fig.add_annotation(x=2.5, y=24, text="Leiden", showarrow=False, font=dict(size=16, color="#003366"))

sbm_fig.show()
save_fig(name="sbm_Leiden_combNum", fig=sbm_fig, base_path=figures_path, width=1200, height=600)

# TF representation

In [63]:
def gene_sel_by_mev(exp, exp_label="", show_figs=True, tf_list=[]):
    all_exps = []
    for num_genes in range(25, 201, 25):
        exp_genes = []
        for modCon in exp.gt_modCon.values():
            top_100 = modCon.sort_values(by=["ModCon_{}_gt".format(exp.type)], ascending=False).index.values[:num_genes]
            exp_genes.extend(top_100)

        # add to the global exps
        sel_df = tcga_mutations_df[tcga_mutations_df.index.isin(exp_genes)]
        mut_0 = sel_df[sel_df["count"] > 0].shape[0]
        mut_5 = sel_df[sel_df["count"] >= 5].shape[0]
        mut_10 = sel_df[sel_df["count"] >= 10].shape[0]

        # tf exploration
        tf_used = set(exp_genes) & set(tf_list)
        tf_used_mut = sel_df[sel_df["count"] > 0]
        tf_used_mut = tf_used_mut[tf_used_mut.index.isin(list(tf_used))].shape[0]

        all_exps.append((len(exp_genes), mut_0, mut_5, mut_10, len(tf_used), tf_used_mut, str(num_genes)))

    genes_stats = pd.DataFrame(all_exps, columns=["#Genes", "#Mutated >0", "#Mutated >5", "#Mutated >10", "#TF", "#Mutated TF", "Exp"])

    metrics_cols = genes_stats.columns[:-1]
    remap_cols = {col: col.replace("#", "%") for col in metrics_cols}
    prct_df = pd.concat(
        [genes_stats["Exp"], genes_stats[metrics_cols].div(genes_stats["#Genes"].values, axis=0).multiply(100).rename(columns=remap_cols)], axis=1
    )

    if show_figs:
        # Proportion
        fig = px.bar(
            prct_df,
            x="Exp",
            y=list(remap_cols.values()),
            barmode="group",
            title="{}. Proportion of genes included when different #num selected by ModCon".format(exp_label),
            height=600,
            text_auto=True,
        )
        fig.show()

        # Numbers
        fig = px.bar(
            genes_stats,
            x="Exp",
            y=genes_stats.columns[:-1],
            barmode="group",
            title="{}. Number of genes when different #num selected by ModCon".format(exp_label),
            height=600,
        )
        fig.show()
    return genes_stats, prct_df, exp_genes


def tf_modCon_exps(exps, tf_range=None, tf_list=[]):
    tf_changes = pd.DataFrame()
    if tf_range is None:
        tf_range = range(3, 10, 1)
    for tf in tf_range:
        sel_exp = exps[tf]
        genes_used, _, _ = gene_sel_by_mev(sel_exp, exp_label=tf, show_figs=False, tf_list=tf_list)

        remap_cols = {col: "{}TF_{}".format(tf, col) for col in ["#TF", "#Mutated TF", "Exp"]}
        dmy = genes_used[list(remap_cols.keys())].copy(deep=True)
        dmy["TF_edges"] = "{}".format(tf)
        tf_changes = pd.concat(
            [tf_changes, dmy],
            axis=0,
        )

    return tf_changes


# This is more for development
def plot_single_ctrl(ctrl_exps: dict, key=1, tf_list=[], exp_tf=[]):
    #### This works for only one Control select the ModCon - 100

    tf_changes_exp = tf_modCon_exps(t_exps, tf_range=list(list(range(3, 10, 1))), tf_list=tf_list)
    tf_changes_control = tf_modCon_exps(ctrl_exps[key]["exps"], tf_range=list(list(range(3, 16, 1))), tf_list=tf_list)

    tf_changes_exp["Type"] = "Experiment"
    tf_changes_control["Type"] = "Control"

    dmy_df = pd.concat([tf_changes_exp, tf_changes_control], axis=0)
    dmy_df = dmy_df[dmy_df["Exp"] == "100"]
    dmy_df["Used_prct"] = dmy_df["#TF"] / len(exp_tf) * 100

    fig = px.line(dmy_df, x="TF_edges", y="Used_prct", color="Type", markers=True, title="% TF used in calculating the MEV", height=600)
    fig.update_layout(
        legend=dict(
            orientation="h",
            # title="Type",
            yanchor="bottom",
            y=0.18,
            xanchor="center",
            x=0.5,
            bgcolor="rgba(0,0,0,0)",
            font=dict(size=12),
        ),
    )
    fig.update_xaxes(title_text="Minimum degree for selected genes", tickfont=dict(size=14), title_font=dict(size=16))
    fig = fig.update_yaxes(title_text="% of genes included", range=[0, 105], tickfont=dict(size=14), title_font=dict(size=16))
    fig.show()


def worker(arg):
    obj, methname = arg[:2]
    _ = getattr(obj, methname)()
    return obj


# Need it to asign the type of sbm method applied
for key, exp in t_exps.items():
    exp.sbm_method = "hsbm"

for key in ctrl_exps.keys():
    for _, exp in ctrl_exps[key]["exps"].items():
        exp.sbm_method = "hsbm"

In [64]:
results = pool.map(worker, ((exp, "get_ModCon") for exp in t_exps.values()))
t_exps = {NetworkOutput.extract_tf_number(exp.name): exp for exp in results}

for key, exp in t_exps.items():
    sort_col = "ModCon_{}_gt".format(exp.type)
    exp.mevsMut, _ = exp.get_mevs(tpms=all_tum_tpms, modCon=exp.gt_modCon, sort_col=sort_col, num_genes=100, verbose=False)

In [65]:
# Healthy control for ModCon and MEV
### ModCon
for key in ctrl_exps.keys():
    print("### ModCon Control {}".format(key))
    results = pool.map(worker, ((exp, "get_ModCon") for exp in ctrl_exps[key]["exps"].values()))
    ctrl_exps[key]["exps"] = {NetworkOutput.extract_tf_number(exp.type): exp for exp in results}

### MEV
for key in ctrl_exps.keys():
    print("### MEV Control {}".format(key))
    for key, exp in ctrl_exps[key]["exps"].items():
        sort_col = "ModCon_{}_gt".format(exp.type)
        exp.mevsMut, _ = exp.get_mevs(tpms=all_tum_tpms, modCon=exp.gt_modCon, sort_col=sort_col, num_genes=100, verbose=False)

### ModCon Control 1
### ModCon Control 2
### ModCon Control 3
### ModCon Control 4
### ModCon Control 5
### ModCon Control 6
### ModCon Control 7
### ModCon Control 8
### ModCon Control 9
### ModCon Control 10
### MEV Control 1
### MEV Control 2
### MEV Control 3
### MEV Control 4
### MEV Control 5
### MEV Control 6
### MEV Control 7
### MEV Control 8
### MEV Control 9
### MEV Control 10


In [95]:
# Control TF changes
used_tf = t_exps[3].tf_list

tf_chgs_all_ctrls = []
tf_range = list(list(range(3, 13, 1)))
for key, val in ctrl_exps.items():
    tf_chgs = tf_modCon_exps(val["exps"], tf_range=tf_range, tf_list=tf_list)
    tf_chgs = tf_chgs[tf_chgs["Exp"] == "100"]
    tf_chgs["Type"] = "hCtrl{}".format(key)
    tf_chgs["Used_prct"] = tf_chgs["#TF"] / len(used_tf) * 100
    # tf_chgs["Mut_prct"] = tf_chgs["#Mutated TF"] / len(used_tf) * 100 #used_tf which are mutated
    tf_chgs_all_ctrls.append(tf_chgs)

tf_chgs_all_ctrls = pd.concat(tf_chgs_all_ctrls, axis=0)

# Generate the Exp Changes too
tf_changes_exp = tf_modCon_exps(t_exps, tf_range=tf_range, tf_list=tf_list)

In [102]:
sel_df

Unnamed: 0,#TF,#Mutated TF,Exp,TF_edges,Type,Used_prct
3,233,216,100,9,hCtrl1,14.215985
3,247,233,100,9,hCtrl2,15.070165
3,238,222,100,9,hCtrl3,14.521049
3,249,233,100,9,hCtrl4,15.19219
3,257,240,100,9,hCtrl5,15.680293
3,240,224,100,9,hCtrl6,14.643075
3,240,226,100,9,hCtrl7,14.643075
3,249,234,100,9,hCtrl8,15.19219
3,255,237,100,9,hCtrl9,15.558267
3,245,231,100,9,hCtrl10,14.948139


In [84]:
# Process the information and get some stats

## Control
all_tf_chgs_stats = []
for tf in tf_chgs_all_ctrls["TF_edges"].unique():
    sel_df = tf_chgs_all_ctrls[tf_chgs_all_ctrls["TF_edges"] == tf]
    all_tf_chgs_stats.append(
        [
            tf,
            sel_df["Exp"].values[0],
            sel_df["Used_prct"].median(axis=0),
            sel_df["Used_prct"].mean(axis=0),
            sel_df["Used_prct"].std(axis=0),
            sel_df["#TF"].median(axis=0),
            sel_df["#TF"].mean(axis=0),
            sel_df["#TF"].std(axis=0),
            sel_df["#Mutated TF"].median(axis=0),
            sel_df["#Mutated TF"].mean(axis=0),
            sel_df["#Mutated TF"].std(axis=0),
        ]
    )


tf_chgs_stats_df = pd.DataFrame(
    all_tf_chgs_stats,
    columns=["TF_edges", "Exp", "Prct_median", "Prct_mean", "Prct_std", "#TF_median", "#TF_mean", "#TF_std", "#Mutated median", "Mutated mean", "Mutated std"],
)
tf_chgs_stats_df["Type"] = "Control"

## Experiment
tf_changes_exp = tf_changes_exp[tf_changes_exp["Exp"] == "100"]
tf_changes_exp["Used_prct"] = tf_changes_exp["#TF"] / len(used_tf) * 100
tf_changes_exp["Prct_median"] = tf_changes_exp["Used_prct"]
tf_changes_exp["Prct_mean"] = tf_changes_exp["Used_prct"]
tf_changes_exp["Prct_std"] = 0
tf_changes_exp["Type"] = "Experiment"
# tf_changes_exp["Mut_prct"] = tf_changes_exp["#Mutated TF"] / len(used_tf) * 100

## Combine the two
comb_chgs = pd.concat([tf_chgs_stats_df, tf_changes_exp], axis=0)

In [83]:
fig = px.line(
    comb_chgs,
    x="TF_edges",
    y="Prct_mean",
    error_y="Prct_std",
    color="Type",
    color_discrete_map=color_map,
    markers=True,
    title="% TF used in calculating the MEV",
    height=600,
)
fig.update_layout(
    legend=dict(
        orientation="h",
        # title="Type",
        yanchor="bottom",
        y=0.18,
        xanchor="center",
        x=0.5,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=16),
    ),
    title="",
    xaxis=dict(
        tickfont=dict(size=16),
        title_font=dict(
            size=16,
        ),
        title="Minimum degree for selected genes",
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        title_font=dict(
            size=16,
        ),
        title="% of genes included",
        range=[0, 25],
    ),
    font=dict(size=16),
)
# fig = fig.update_yaxes(title_text="% of genes included", range=[0, 105], tickfont=dict(size=14), title_font=dict(size=16))
fig.update_traces(line_width=5, marker_size=12, error_y=dict(width=5))
save_fig(name="ctrls_min_dig_mev", fig=fig, base_path=figures_path, width=1200, height=600)
fig.show()

In [69]:
# for development
if False:
    for key in range(1, 3, 1):
        display(plot_single_ctrl(ctrl_exps, key=key), tf_list=tf_list)

## Find common genes in TFs

In [70]:
num_genes = 100
all_ctrls = {}
for ctrl_exp in ctrl_exps.values():
    all_exps = {}
    for exp in ctrl_exp["exps"].values():
        exp_genes = []
        for modCon in exp.gt_modCon.values():
            top_100 = modCon.sort_values(by=["ModCon_{}_gt".format(exp.type)], ascending=False).index.values[:num_genes]
            exp_genes.extend(top_100)

        # tf exploration
        tf_used = set(exp_genes) & set(tf_list)
        if all_exps:
            all_exps = tf_used & all_exps
        else:
            all_exps = tf_used

    if all_ctrls:
        all_ctrls = all_exps & all_ctrls
    else:
        all_ctrls = all_exps

# To save the list
if True:
    dmy_df = pd.DataFrame(index=list(all_ctrls))
    dmy_df["mut_count"] = tcga_mutations_df["count"]
    sel_tum_df = all_tum_tpms.loc[all_tum_tpms.index.isin(list(all_ctrls))]
    sel_h_df = exp.tpm_df.loc[exp.tpm_df.index.isin(list(all_ctrls))]

    dmy_df["tum_median_expression"] = sel_tum_df.median(axis=1)
    dmy_df["healthy_median_expression"] = sel_h_df.median(axis=1)

    dmy_df["tum_mean_expression"] = sel_tum_df.mean(axis=1)
    dmy_df["healthy_mean_expression"] = sel_h_df.mean(axis=1)

    dmy_df["tum_std_expression"] = sel_tum_df.std(axis=1)
    dmy_df["healthy_std_expression"] = sel_h_df.std(axis=1)

    # Some interesting info
    print("TFs mutated but not expressed: ")
    display(dmy_df[dmy_df["tum_median_expression"].isna()])

    dmy_df.fillna(0, inplace=True)
    dmy_df.index.names = ["gene"]
    dmy_df.to_csv(data_base + "gc42_tum_tf_ctrl_1.tsv", sep='\t')

TFs mutated but not expressed: 


Unnamed: 0,mut_count,tum_median_expression,healthy_median_expression,tum_mean_expression,healthy_mean_expression,tum_std_expression,healthy_std_expression


## Export network stats

In [71]:
import pickle

# Define a function to process each experiment
def process_experiment(exp, tcga_mutations_df, exps_path):
    exp.hsbm_add_vp(mut_df=tcga_mutations_df)
    exp.graph = exp.gt_g
    network_stats = exp.compute_graph_stats()
    network_stats["TF"] = exp.nodes_df["TF"]
    network_stats["mut_count"] = exp.nodes_df["count"]
    network_stats["max_b"] = exp.com_df["max_b"]

    network_stats.index.names = ["gene"]
    network_stats.to_csv(f"{exps_path}/Stats/networkStats_{exp.name}.tsv", sep="\t")

# Function to run multiprocessing
def run_multiprocessing(ctrl_exps, tcga_mutations_df):
    # Create a list of tasks
    tasks = []
    for ctrl_exp in ctrl_exps.values():
        for exp in ctrl_exp["exps"].values():
            tasks.append((exp, tcga_mutations_df, exp.exps_path))

    # Create a pool of workers
    pool = mp.Pool(processes=mp.cpu_count())  # You can specify the number of processes here

    # Map tasks to the worker function
    pool.starmap(process_experiment, tasks)

    # Close the pool and wait for work to complete
    pool.close()
    pool.join()


def net_stats_exp_set(exps: dict):

    for exp in exps.values():
        print(exp.name)
        exp.hsbm_add_vp(mut_df=tcga_mutations_df)
        com_df, _ = ctrl_exps[idx]["exps"][key].hsbm_get_gt_df()
        exp.graph = exp.gt_g
        network_stats: pd.DataFrame = exp.compute_graph_stats()
        network_stats["TF"] = exp.nodes_df["TF"]
        network_stats["mut_count"] = exp.nodes_df["count"]
        network_stats["max_b"] = com_df["max_b"]
        network_stats.index.names = ["gene"]

        # add modCon Rank
        for com, modCon in exp.gt_modCon.items():
            network_stats.loc[network_stats.index.isin(modCon.index), "ModCon_Rank"] = modCon[f"ModCon_{exp.type}_gt"].rank(ascending=False)

        # Load the actual TFs
        exp_stats_path = f"{exp.exps_path}/Stats/{exp.name}.pickle"
        network_stats["marked_as_tf"] = 0 # Some older version don't have a tf_list file attached and we have to account for that, that's for the healthyExp
        if os.path.isfile(exp_stats_path):
            with open(exp_stats_path, "rb") as handle:
                exp_stats = pickle.load(handle)

                if "tf_list" in exp_stats:
                    marked_genes = exp_stats["tf_list"]
                    network_stats.loc[network_stats.index.isin(marked_genes), "marked_as_tf"] = 1

        # Check the experiments
        marked_genes = set(network_stats.loc[network_stats["marked_as_tf"] == 1].index)
        actual_tf = set(network_stats.loc[network_stats["TF"] == 1].index)
        if len(marked_genes & actual_tf) > 0:
            print(f"### Failed at choosing ctrl TF {exp.name}")

        network_stats.to_csv(f"{exp.exps_path}/Stats/networkStats_{exp.name}.tsv", sep="\t")

In [72]:
# Export the TF genes with the network metrics
if 1:
    for name, ctrl_exp in ctrl_exps.items():
        all_exps = {}
        print(f"### Ctrl exp {name}")
        net_stats_exp_set(ctrl_exp["exps"])

    print(f"### Network stats for experiment")
    net_stats_exp_set(t_exps)

### Ctrl exp 1
tum_ctrl_standard_7TF
### Failed at choosing ctrl TF tum_ctrl_standard_7TF
tum_ctrl_standard_9TF
### Failed at choosing ctrl TF tum_ctrl_standard_9TF
tum_ctrl_standard_11TF
### Failed at choosing ctrl TF tum_ctrl_standard_11TF
tum_ctrl_standard_3TF
### Failed at choosing ctrl TF tum_ctrl_standard_3TF
tum_ctrl_standard_5TF
### Failed at choosing ctrl TF tum_ctrl_standard_5TF
tum_ctrl_standard_10TF
### Failed at choosing ctrl TF tum_ctrl_standard_10TF
tum_ctrl_standard_8TF
### Failed at choosing ctrl TF tum_ctrl_standard_8TF
tum_ctrl_standard_12TF
### Failed at choosing ctrl TF tum_ctrl_standard_12TF
tum_ctrl_standard_6TF
### Failed at choosing ctrl TF tum_ctrl_standard_6TF
tum_ctrl_standard_4TF
### Failed at choosing ctrl TF tum_ctrl_standard_4TF
### Ctrl exp 2
tum_ctrl_standard_9TF
### Failed at choosing ctrl TF tum_ctrl_standard_9TF
tum_ctrl_standard_11TF
### Failed at choosing ctrl TF tum_ctrl_standard_11TF
tum_ctrl_standard_7TF
### Failed at choosing ctrl TF tum_ctrl_

### Pparg/Rarg stats

In [None]:
import pickle as pickle

# Adding the marked genes as TF. This really should haven't been in the other for loop
genes = ["RARG", "PPARG", "ELF3", "AHR"]
comb_df = pd.DataFrame()
for name, ctrl_exp in ctrl_exps.items():
    for exp in ctrl_exp["exps"].values():
        network_stats = pd.read_csv(f"{exp.exps_path}/Stats/networkStats_{exp.name}.tsv", index_col="gene", sep="\t")

        dmy_df = network_stats.loc[network_stats.index.isin(genes)]
        dmy_df["num_tf"] = int(exp.name.split("_")[-1].split("TF")[0])
        dmy_df["ctrl"] = int(name)
        dmy_df["exp"] = exp.name.replace("standard_int", "std")

        comb_df = pd.concat([comb_df, dmy_df], axis=0)

In [None]:
px.box(comb_df.reset_index(), x="gene", y="degree_w", color="num_tf")

# Clustering analysis

In [100]:
gt_exps = {}
for key in [3, 6, 10]:
    dmy_exp = t_exps[key]
    sort_col = "ModCon_{}_gt".format(dmy_exp.type)
    gt_modCon = dmy_exp.get_ModCon()
    dmy_exp.mevsMut, _ = dmy_exp.get_mevs(tpms=all_tum_tpms, modCon=gt_modCon, sort_col=sort_col, num_genes=100, verbose=False)

    cs_exp, fig_std, metrics = gh.run_clusters(dmy_exp, label="exp_tf{}".format(key), show_figs=True)

    gt_exps[key] = {"exp": dmy_exp, "cs": cs_exp}

comb_cs = pd.concat([gt_exps[3]["cs"], gt_exps[6]["cs"], gt_exps[10]["cs"], vu_output], axis=1).dropna()

# for single experiment
if False:
    tf = 6
    gt_exp = h_exps["6"]
    sort_col = "ModCon_{}_gt".format(gt_exp.type)
    gt_modCon = gt_exp.get_ModCon()
    gt_exp.mevsMut, _ = gt_exp.get_mevs(tpms=all_tum_tpms, modCon=gt_modCon, sort_col=sort_col, num_genes=100, verbose=False)

Variation per principal component [0.40436708 0.21250362] and the sum 61.69%
Variation per principal component [0.41147003 0.21441147] and the sum 62.59%
Variation per principal component [0.39984126 0.20689605] and the sum 60.67%


In [101]:
cluster_model, cs = "RawKMeans", 6
col_exp_3 = "{}_CS_{}_{}_tf{}".format(cluster_model, cs, "exp", 3)
col_exp_6 = "{}_CS_{}_{}_tf{}".format(cluster_model, cs, "exp", 6)
col_exp_10 = "{}_CS_{}_{}_tf{}".format(cluster_model, cs, "exp", 10)


cols = ["TCGA408_classifier", "KMeans_labels_6"] + [col_exp_3, col_exp_6, col_exp_10] + ["consensus"]
rename_cols = {
    cols[0]: "TCGA",
    cols[1]: "CA + IFNg",
    cols[2]: "SBM_3TF",
    cols[3]: "SBM_6TF",
    cols[4]: "SBM_10TF",
    cols[5]: "Consensus",
}

meta, fig = sky.main(df=comb_cs.rename(columns=rename_cols), reorder_cols=list(rename_cols.values()), title="", retMeta=True)

fig.update_layout(
    title="",
    font=dict(size=16),
)
fig.show()
save_fig(name="Sankey_comp_SBM_5K", fig=fig, base_path=figures_path, width=1200, height=600)

## Survival

In [102]:
def prep_survival(df, cs_num=6, tf=10, cs_model="RawKMeans", label="SBM"):
    model = "{}_CS_{}_exp_tf{}".format(cs_model, cs_num, tf)

    colors_net = px.colors.qualitative.G10
    for idx, val in enumerate(df[model].unique()):
        color_map[val] = colors_net[idx]

    df[model] = df[model].astype(str)
    fig = survival_plot(df.drop(columns=["days_to_last_follow_up", "days_to_death"]), vu_output, classifier=model, color_map=color_map)
    fig = fig.update_layout(title="{}. Survival analysis for {}".format(label, model))

    return fig


tcga_metadata = pd.read_csv(f"{data_base}/tumour/TCGA_metadata.tsv", sep="\t", index_col="Sample")

In [103]:
fig = prep_survival(comb_cs, cs_num=6, tf=6, cs_model="RawKMeans", label="SBM")
# save_fig(name="Survival_plot_reward", fig=fig, base_path=figures_path, width=1400, height=600)

fig.update_layout(
    legend=dict(
        orientation="h",
        title="Network subtype",
        yanchor="bottom",
        y=0.1,
        xanchor="center",
        x=0.5,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=16, color="#003366"),
    ),
    title="",
    xaxis=dict(tickfont=dict(size=16)),
    yaxis=dict(tickfont=dict(size=16)),
    font=dict(size=16),
)
fig.show()
save_fig(name="Survival_SBM_5K_6TF", fig=fig, base_path=figures_path, width=1000, height=600)

In [104]:
cluster_model = "RawKMeans_CS_6_exp_tf6"

colors_ref = px.colors.qualitative.Prism
color_map = {
    "LumP": colors_ref[0],
    "Lum Inf/Ns": colors_ref[1],
    "High IFNG": colors_ref[2],
    "Low IFNG": colors_ref[3],
    "Med IFNG": colors_ref[4],
    "Ne": colors_ref[5],
}

select_labels_1 = [1.0, 0.0, 5.0]
select_labels_2 = ["High IFNG", "Med IFNG", "Low IFNG"]

select_labels_1 = None
select_labels_2 = None

colors_net = px.colors.qualitative.Plotly
comb_cs[cluster_model] = comb_cs[cluster_model].astype(str)
for idx, val in enumerate(comb_cs[cluster_model].unique()):
    color_map[val] = colors_net[idx]

fig = survival_comp(
    comb_cs.drop(columns=["days_to_last_follow_up", "days_to_death"]),
    vu_output,
    classifier_1=cluster_model,
    classifier_2="KMeans_labels_6",
    selected_labels_1=select_labels_1,
    selected_labels_2=select_labels_2,
    color_map=color_map,
)
fig = fig.update_layout(title="Survival analysis {}".format("VU + in-situ"))
fig.update_layout(height=700)
# save_fig(name="Survival_plot_reward", fig=fig, base_path=figures_path, width=1400, height=600)

### Significance

In [105]:
from lifelines.statistics import multivariate_logrank_test

In [106]:
def survival_sig(df, model):
    df = df.reset_index().rename(columns={"index": "Sample"}).copy(deep=True)
    classifier = model

    dmy = df[["days_to_last_follow_up", "days_to_death", classifier]].replace("--", 0).astype(int)
    dmy["last_contact"] = dmy[["days_to_last_follow_up", "days_to_death"]].max(axis=1).div(30)

    labels = list(df[model].unique())
    dmy = dmy[dmy[classifier].isin(labels)]

    results = multivariate_logrank_test(dmy["last_contact"], dmy[classifier], dmy["days_to_death"])
    display(results.print_summary())
    print("{0:.10f}".format(results.p_value))

In [107]:
comb_cs[cluster_model] = comb_cs[cluster_model].astype(float).astype(int)
survival_sig(comb_cs, model=cluster_model)

0,1
t_0,-1
null_distribution,chi squared
degrees_of_freedom,5
test_name,multivariate_logrank_test

Unnamed: 0,test_statistic,p,-log2(p)
0,9.74,0.08,3.59


None

0.0829495254


# Visualise SBM

In [None]:
def callback(self, g, keyval, picked, pos, vprops, eprops):
    # label
    if type(picked) != gt.VertexPropertyMap:
        print("{}. Comm {}. Mut {}. Idx {}".format(g.vp.gene[picked], g.vp.max_b[picked], g.vp.mut_count[picked], picked))

In [None]:
gt_exp = t_exps[6]
gt_exp.layout()
gt_exp.add_vp(mut_df=tcga_mutations_df)
pos = gt_exp.pos
gt_g = gt_exp.gt_g
pv = gt_exp.states[0]["pv"]
pmode = gt_exp.states[0]["pmode"]

gt_g = gt_exp.gt_g
state = gt.BlockState(gt_g, recs=[gt_g.ep.weight], rec_types=["real-exponential"])

In [None]:
# interactive session
if False:
    state.draw(
        pos=pos,
        vertex_shape="pie",
        vertex_pie_fractions=pv,
        inline=False,
        nodeFirst=True,
        # aspect=0.4,
        ink_scale=0.4,
        vertex_text=gt_g.vp.gene,
        display_props=gt_g.vp.gene,
        output_size=(2000, 2000),  # debugging
        #### Nodes properties
        # vertex_size=gt.prop_to_size(gt_g.vp.mut_count, 1, 20, power=1),
        vertex_font_size=gt.prop_to_size(gt_g.vp.mut_count, 10, 14, power=1.0),  # 5, 30 for notebook  viz
        ##### edges properties
        # edge_color=gt.prop_to_size(gt_g.ep.weight, power=1, log=True),
        edge_pen_width=gt.prop_to_size(gt_g.ep.weight, 0, 3, power=0.1),
        # edge_gradient=[],
        # vorder=gt_g.vp.mut_count,
        key_press_callback=callback,
        # text_out_width=0.1,
    )

In [None]:
# Config to save it as PDF
state.draw(
    pos=pos,
    vertex_shape="pie",
    vertex_pie_fractions=pv,
    inline=False,
    vertex_text=gt_g.vp.gene,
    display_props=gt_g.vp.gene,
    output_size=(8000, 8000),  # good to visualised in Notebook
    #### Nodes properties
    vertex_size=gt.prop_to_size(gt_g.vp.mut_count, 1, 20, power=1),
    vertex_font_size=gt.prop_to_size(gt_g.vp.mut_count, 14, 24, power=1.0), 
    ##### edges properties
    edge_pen_width=gt.prop_to_size(gt_g.ep.weight, 0, 1, power=0.1),
    key_press_callback=callback,
    output=figures_path + "sbm_h_tf6_5K.pdf",
)


Unknown parameter: display_props


Unknown parameter: key_press_callback



<VertexPropertyMap object with value type 'vector<double>', for Graph 0x1d6af1490, at 0x1d7148210>

## Export SBM to Gephi

In [None]:
results_df = gt_exp.get_stable_genes(state_idx=0, prob_th=0.75)
results_df["stable_gene_num"] = 0
results_df.loc[results_df["stable_gene"] == "wobble", "stable_gene_num"] = 1
results_df["stable_gene"].value_counts()

gt_exp.export_to_gephi(save=False, sbm_df=results_df, state_idx=0)

AttributeError: 'PGCNAOutput' object has no attribute 'graph_type'

## Partition Prob

In [None]:
gt_exp.plot_partition_prob()