In [1]:
# imports
import numpy as np
import pandas as pd
import networkx as nx
import kgfe

In [2]:
from kgfe import gene_names

In [3]:
import igraph as ig

## Load SPOKE

In [4]:
spoke_graph = kgfe.spoke_loader.load_spoke_igraph('../../graph_utils/spoke_2021.jsonl.gz', directed=True,
                                                    verbose=False)

In [90]:
gene_names.get_id('LILRA3')

11026

In [101]:
spoke_graph_ud = spoke_graph.copy()
spoke_graph_ud.to_undirected(combine_edges='first')
spoke_graph_ud.simplify(combine_edges='first')
spoke_graph_ud = kgfe.graph_info.largest_component(spoke_graph_ud)

In [102]:
genes_to_ids = kgfe.graph_info.spoke_identifiers_to_ids(spoke_graph_ud, 'Gene')

In [103]:
disease_to_ids = kgfe.graph_info.spoke_identifiers_to_ids(spoke_graph_ud, 'Disease')

In [104]:
disease_to_ids['DOID:9119']

'2166630'

## List of Venetoclax genes

In [105]:
genes_small = 'BCL2, NFKB2, GATA2, NPC2'.split(', ')
gene_ncbi_ids = gene_names.get_ids(genes_small)

In [106]:
genes_venetoclax = 'FGD4 MIR4519 NPC2 GATA2 CASC10 POU6F1 BCL2 BECN1 C1QA SF3B2 IGF2R DNAJC1 ECHDC3 MIR940 GDF11 RBPJ MTSS1 FNTA'
genes_venetoclax += 'CTCF NFKB2 FGD4 RPS10 LILRB2 COX6A1 C1QA FOXO1 LILRA3 RELB GOLGA8J LEF1'

genes_venetoclax = genes_venetoclax.split()
genes_venetoclax_ncbi_ids = []
for g in genes_venetoclax:
    try:
        genes_venetoclax_ncbi_ids.append(gene_names.get_id(g))
    except:
        pass

In [107]:
genes_venetoclax_spoke_ids = [genes_to_ids[g] for g in genes_venetoclax_ncbi_ids if g in genes_to_ids] + ['2166630']

In [114]:
print('venetoclax genes:', len(genes_venetoclax), 'spoke-venetoclax genes:', len(genes_venetoclax_spoke_ids))

venetoclax genes: 29 spoke-venetoclax genes: 26


In [108]:
gene_ids = [genes_to_ids[g] for g in gene_ncbi_ids] + ['2166630']

## Get Steiner Tree for AML genes on SPOKE-2021

In [109]:
gene_st, gene_subgraph = kgfe.explanations.steiner_tree_subgraph(spoke_graph_ud,
                                                                    gene_ids,
                                                                         method='takahashi')

In [110]:
import ipycytoscape
graph_style = [{
                        'selector': 'node[feature_name]',
                             'style': {
                                  'font-family': 'helvetica',
                                  'font-size': '20px',
                                 'label': 'data(feature_name)',
                        }},
                        {'selector': 'node[category="Protein"]',
                         'style': {
                             'background-color': 'blue',
                         }},
                        {'selector': 'node[in_query=1]',
                         'style': {
                             'border-width': '5px',
                             'border-color': 'red',
                         }},
                        {'selector': 'edge[type]',
                         'style': {
                             'label': 'data(type)',
                         }},
                        {"selector": "edge.directed",
                         "style": {
                            "curve-style": "bezier",
                            "target-arrow-shape": "triangle",
                    }},

                    ]

In [111]:
undirected = ipycytoscape.CytoscapeWidget()
gene_subgraph_nx = gene_subgraph.to_networkx()
node_dict = {i: n['name'] for i, n in gene_subgraph_nx.nodes.items()}
gene_subgraph_nx = nx.relabel_nodes(gene_subgraph_nx, node_dict)
undirected.graph.add_graph_from_networkx(gene_subgraph_nx)
undirected.set_tooltip_source('feature_name')
undirected.set_layout(title='high_proteins', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)

In [112]:
display(undirected)

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'title': 'high_proteins', 'nodeSpacing': 80, 'edgeLengthVal'…

### Steiner tree for "full" venetoclax genes on SPOKE-2021

In [113]:
gene_st, gene_subgraph = kgfe.explanations.steiner_tree_subgraph(spoke_graph_ud,
                                                                    genes_venetoclax_spoke_ids,
                                                                         method='takahashi')

In [115]:
undirected = ipycytoscape.CytoscapeWidget()
gene_subgraph_nx = gene_subgraph.to_networkx()
node_dict = {i: n['name'] for i, n in gene_subgraph_nx.nodes.items()}
gene_subgraph_nx = nx.relabel_nodes(gene_subgraph_nx, node_dict)
undirected.graph.add_graph_from_networkx(gene_subgraph_nx)
undirected.set_tooltip_source('feature_name')
undirected.set_layout(title='high_proteins', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)

In [116]:
display(undirected)

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'title': 'high_proteins', 'nodeSpacing': 80, 'edgeLengthVal'…

## Merging graphs - do stuff on combined SIGNOR, reactome, kegg data

In [17]:
kgfe.graph_info.get_available_graphs()
# consider SIGNOR, kegg, reactome

['reactome_ppis.csv.gz',
 'uniprot_genes.csv',
 'spoke_2021.jsonl.gz',
 'kegg_pathway_data.csv',
 'hsa04930.csv',
 'reactome_genes_chems.csv.gz',
 'SIGNOR_formated.tsv',
 'kegg_interaction_data.csv',
 'KEGG_signaling.tsv',
 'msigdb']

In [18]:
kegg_signaling = kgfe.graph_info.load_graph('KEGG_signaling.tsv')

In [72]:
kegg_signaling.predicate = 'kegg_' + kegg_signaling.predicate

In [19]:
signor_signaling = kgfe.graph_info.load_graph('SIGNOR_formated.tsv')

In [70]:
signor_signaling.predicate = 'signor_' + signor_signaling.predicate

In [20]:
reactome_genes_chems = kgfe.graph_info.load_graph('reactome_genes_chems.csv.gz')

  df = pd.read_csv(filename)


In [71]:
reactome_genes_chems.predicate = 'reactome_' + reactome_genes_chems.predicate

In [73]:
merged_graph = pd.concat([kegg_signaling, signor_signaling, reactome_genes_chems], axis=0)

In [74]:
biggim_graph = kgfe.graph_info.df_to_graph(merged_graph)

In [75]:
len(biggim_graph.vs)

24839

In [117]:
genes_to_ids = kgfe.graph_info.get_names_to_ids(biggim_graph, category='Gene')

## Get Steiner Tree for AML genes on combined graph

In [118]:
gene_ids = [genes_to_ids[g] for g in genes_small]

In [119]:
genes_venetoclax_ids = [genes_to_ids[g] for g in genes_venetoclax if g in genes_to_ids]

In [120]:
gene_st, gene_subgraph = kgfe.explanations.steiner_tree_subgraph(biggim_graph,
                                                                    gene_ids,
                                                                         method='takahashi')

In [121]:
graph_style = [{
                        'selector': 'node[feature_name]',
                             'style': {
                                  'font-family': 'helvetica',
                                  'font-size': '20px',
                                 'label': 'data(feature_name)',
                        }},
                        {'selector': 'node[category="Protein"]',
                         'style': {
                             'background-color': 'blue',
                         }},
                        {'selector': 'node[in_query=1]',
                         'style': {
                             'border-width': '5px',
                             'border-color': 'red',
                         }},
                        {'selector': 'edge[predicate]',
                         'style': {
                             'label': 'data(predicate)',
                         }},
                        {"selector": "edge.directed",
                         "style": {
                            "curve-style": "bezier",
                            "target-arrow-shape": "triangle",
                    }},

                    ]

In [122]:
undirected = ipycytoscape.CytoscapeWidget()
gene_subgraph_nx = gene_subgraph.to_networkx()
node_dict = {i: n['name'] for i, n in gene_subgraph_nx.nodes.items()}
gene_subgraph_nx = nx.relabel_nodes(gene_subgraph_nx, node_dict)
undirected.graph.add_graph_from_networkx(gene_subgraph_nx)
for node in undirected.graph.nodes:
    if 'name' in node.data:
        node.data['id'] = node.data['name']
undirected.set_tooltip_source('feature_name')
undirected.set_layout(title='high_proteins', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)

In [123]:
display(undirected)

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'title': 'high_proteins', 'nodeSpacing': 80, 'edgeLengthVal'…

In [124]:
gene_st, gene_subgraph = kgfe.explanations.steiner_tree_subgraph(biggim_graph,
                                                                    genes_venetoclax_ids,
                                                                         method='takahashi')

### Steiner tree for "full" venetoclax genes on merged graph

In [125]:
undirected = ipycytoscape.CytoscapeWidget()
gene_subgraph_nx = gene_subgraph.to_networkx()
node_dict = {i: n['name'] for i, n in gene_subgraph_nx.nodes.items()}
gene_subgraph_nx = nx.relabel_nodes(gene_subgraph_nx, node_dict)
undirected.graph.add_graph_from_networkx(gene_subgraph_nx)
for node in undirected.graph.nodes:
    if 'name' in node.data:
        node.data['id'] = node.data['name']
undirected.set_tooltip_source('feature_name')
undirected.set_layout(title='high_proteins', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)

In [126]:
display(undirected)

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'title': 'high_proteins', 'nodeSpacing': 80, 'edgeLengthVal'…