In [16]:
import numpy as np
import networkx as nx
import random
import torch
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn import metrics
import importlib

import load_data as ld
import func
import evaluation
import algorithms.ETNA as etna
import algorithms.helper as helper

In [2]:

org0 = 'sce'
org1 = 'spo'

#load network
g0 = ld.load_ppi(org0, k_core=0, lcc=False)
g1 = ld.load_ppi(org1, k_core=0, lcc=False)

load the ppi network of sce
read as directed: 7073 nodes,          116326 edges
remove selfloop edges: 7072 nodes, 114578 edges
convert to undirected: 7072 nodes, 114578 edges
keep the largest cc: 7072 nodes, 114578 edges
return the 0-core: 7072 nodes,                114578 edges
load the ppi network of spo
read as directed: 3573 nodes,          13122 edges
remove selfloop edges: 3508 nodes, 12567 edges
convert to undirected: 3508 nodes, 12567 edges
keep the largest cc: 3508 nodes, 12567 edges
return the 0-core: 3508 nodes,                12567 edges


In [5]:
#node2index and index2node mapping
g0_node2index = func.node2index(g0)
g0_node2index = defaultdict(lambda:-1, g0_node2index)
g1_node2index = func.node2index(g1)
g1_node2index = defaultdict(lambda:-1, g1_node2index)
g0_index2node = func.index2node(g0)
g1_index2node = func.index2node(g1)

In [7]:
#load ontology file
ontology_file = org0 + '_' + org1 + '_ontology_pairs_expert.txt'
ontology = ld.load_go_pairs(org0, org1, ontology_file)
ontology = ld.filter_anchor(ontology, g0_node2index, g1_node2index)
print('ontology', len(ontology))

#load ortholog file
ortholog = ld.load_anchor(org0, org1)
ortholog = ld.filter_anchor(ortholog, g0_node2index, g1_node2index)
print('ortholog', len(ortholog))

ortholog_set = set()
ortholog_matrix = np.zeros((len(g0.nodes()), len(g1.nodes())), dtype=int)

for i, j, k in ortholog:
    i_idx = g0_node2index[i]
    j_idx = g1_node2index[j]
    ortholog_set.add((i_idx, j_idx))
    ortholog_matrix[i_idx][j_idx] = 1



ontology_set = set()
ontology_matrix = np.zeros((len(g0.nodes()), len(g1.nodes())), dtype=int)
for i,j in ontology:
    i_idx = g0_node2index[i]
    j_idx = g1_node2index[j]
    ontology_set.add((i_idx, j_idx))
    ontology_matrix[i_idx][j_idx] = 1

(244188, 2)
ontology 222217
ortholog 2189


In [9]:
#select gene with at least one annotation
org0_annotations = np.sum(ontology_matrix, axis=1)
org1_annotations = np.sum(ontology_matrix, axis=0)
org0_ontology_indexes = [x for x in range(len(org0_annotations)) if org0_annotations[x]>0]
org1_ontology_indexes = [x for x in range(len(org1_annotations)) if org1_annotations[x]>0]

test_matrix = ontology_matrix[org0_ontology_indexes][:,org1_ontology_indexes]

In [13]:
g0_adj = nx.adjacency_matrix(g0)
g0_lap = nx.laplacian_matrix(g0).toarray()
g0_dw = helper.direct_compute_deepwalk_matrix(g0_adj, 10).toarray()

g0_norms = np.linalg.norm(g0_dw, axis=1, keepdims=True)
g0_norms[g0_norms == 0] = 1
g0_normalized = (g0_dw/g0_norms)
g0_adj = g0_adj.toarray()

g1_adj = nx.adjacency_matrix(g1)
g1_lap = nx.laplacian_matrix(g1).toarray()
g1_dw = helper.direct_compute_deepwalk_matrix(g1_adj, 10).toarray()
g1_norms = np.linalg.norm(g1_dw, axis=1, keepdims=True)
g1_norms[g1_norms == 0] = 1
g1_normalized = (g1_dw/g1_norms)
g1_adj = g1_adj.toarray()


In [28]:
importlib.reload(etna)

<module 'algorithms.ETNA' from '/Users/llc/proj-net-embed/src/algorithms/ETNA.py'>

In [29]:
device = 'cpu'
#initialize model
model0 = etna.ETNAModel(len(g0.nodes), hidden_layers=[1024, 128], 
                        device=device, rbm_init=False, 
                        rbms=None).to(device)
model1 = etna.ETNAModel(len(g1.nodes), hidden_layers=[1024, 128], 
                        device=device, rbm_init=False, 
                        rbms=None).to(device)
trainer0 = etna.ETNATrainer(g0, model0, device=device, precal=True, 
                     matrices=(g0_adj, g0_lap, g0_dw, g0_normalized))
trainer1 = etna.ETNATrainer(g1, model1, device=device, precal=True, 
                     matrices=(g1_adj, g1_lap, g1_dw, g1_normalized))

In [31]:
optimizer_align = torch.optim.Adam(model0.encoders_parameters()+
                                   model1.encoders_parameters(), 
                                  lr=0.001)
scheduler_align = torch.optim.lr_scheduler.StepLR(optimizer_align, 
                                                  step_size=1, 
                                                  gamma=1)

In [32]:
#model training
for i in range(5):
    for j in range(1):
        trainer0.fit(batch_size=128, epochs=1, verbose=0)
        trainer1.fit(batch_size=128, epochs=1, verbose=0)
        
    
    for k in range(1):
        etna.cross_training(trainer0, trainer1, ortholog_set,
                            optimizer_align, scheduler_align, 
                            device, psi=1)


In [36]:
#get embedding and score matrix
emb0 = trainer0.get_embeddings()
emb1 = trainer1.get_embeddings()
S = metrics.pairwise.cosine_similarity(emb0, emb1)

In [39]:
auroc, auprc = evaluation.evaluate_all(S[org0_ontology_indexes][:,org1_ontology_indexes], 
                                           test_matrix, np.ones(test_matrix.shape))
print(auroc, auprc)

0.7103785558842275 0.1590980092245576
