In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
from __future__ import division
from __future__ import print_function
from operator import itemgetter
from itertools import combinations
import time
import os

#import pixiedust

import tensorflow as tf
import numpy as np
import networkx as nx
import scipy.sparse as sp
from sklearn import metrics

from decagon.deep.optimizer import DecagonOptimizer
from decagon.deep.model import DecagonModel
from decagon.deep.minibatch import EdgeMinibatchIterator
from decagon.utility import rank_metrics, preprocessing

  from ._conv import register_converters as _register_converters


In [2]:
# Train on CPU (hide GPU) due to memory constraints
os.environ['CUDA_VISIBLE_DEVICES'] = ""

# Train on GPU
#os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
#os.environ["CUDA_VISIBLE_DEVICES"] = '0'
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True

np.random.seed(0)

###########################################################
#
# Functions
#
###########################################################


def get_accuracy_scores(edges_pos, edges_neg, edge_type):
    feed_dict.update({placeholders['dropout']: 0})
    feed_dict.update({placeholders['batch_edge_type_idx']: minibatch.edge_type2idx[edge_type]})
    feed_dict.update({placeholders['batch_row_edge_type']: edge_type[0]})
    feed_dict.update({placeholders['batch_col_edge_type']: edge_type[1]})
    rec = sess.run(opt.predictions, feed_dict=feed_dict)

    def sigmoid(x):
        return 1. / (1 + np.exp(-x))

    # Predict on test set of edges
    preds = []
    actual = []
    predicted = []
    edge_ind = 0
    for u, v in edges_pos[edge_type[:2]][edge_type[2]]:
        score = sigmoid(rec[u, v])
        preds.append(score)
        assert adj_mats_orig[edge_type[:2]][edge_type[2]][u,v] == 1, 'Problem 1'

        actual.append(edge_ind)
        predicted.append((score, edge_ind))
        edge_ind += 1

    preds_neg = []
    for u, v in edges_neg[edge_type[:2]][edge_type[2]]:
        score = sigmoid(rec[u, v])
        preds_neg.append(score)
        assert adj_mats_orig[edge_type[:2]][edge_type[2]][u,v] == 0, 'Problem 0'

        predicted.append((score, edge_ind))
        edge_ind += 1

    preds_all = np.hstack([preds, preds_neg])
    preds_all = np.nan_to_num(preds_all)
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    predicted = list(zip(*sorted(predicted, reverse=True, key=itemgetter(0))))[1]

    roc_sc = metrics.roc_auc_score(labels_all, preds_all)
    aupr_sc = metrics.average_precision_score(labels_all, preds_all)
    apk_sc = rank_metrics.apk(actual, predicted, k=50)

    return roc_sc, aupr_sc, apk_sc


def construct_placeholders(edge_types):
    placeholders = {
        'batch': tf.placeholder(tf.int32, name='batch'),
        'batch_edge_type_idx': tf.placeholder(tf.int32, shape=(), name='batch_edge_type_idx'),
        'batch_row_edge_type': tf.placeholder(tf.int32, shape=(), name='batch_row_edge_type'),
        'batch_col_edge_type': tf.placeholder(tf.int32, shape=(), name='batch_col_edge_type'),
        'degrees': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
    }
    placeholders.update({
        'adj_mats_%d,%d,%d' % (i, j, k): tf.sparse_placeholder(tf.float32)
        for i, j in edge_types for k in range(edge_types[i,j])})
    placeholders.update({
        'feat_%d' % i: tf.sparse_placeholder(tf.float32)
        for i, _ in edge_types})
    return placeholders

In [3]:
###########################################################
#
# Load and preprocess data (This is a dummy toy example!)
#
###########################################################

####
# The following code uses artificially generated and very small networks.
# Expect less than excellent performance as these random networks do not have any interesting structure.
# The purpose of main.py is to show how to use the code!
#
# All preprocessed datasets used in the drug combination study are at: http://snap.stanford.edu/decagon:
# (1) Download datasets from http://snap.stanford.edu/decagon to your local machine.
# (2) Replace dummy toy datasets used here with the actual datasets you just downloaded.
# (3) Train & test the model.
####


In [4]:
#bio-decagon-ppi:Protein-protein interaction network
#bio-decagon-targets: Drug-target protein associations
#bio-decagon-targets-all: Drug-target protein associations culled from several curated databases
#bio-decagon-combo:Polypharmacy side effects in the form of (drug A, side effect type, drug B) triples
#bio-decagon-effectcategories: Side effect categories
#bio-decagon-mono:Side effects of individual drugs in the form of (drug A, side effect type) tuples

<h1>Information about the datasets from the paper:</h1>
<h4>The protein-prtoein network:</h4>
number of proteins = 19,085, number of physical interactions = 719,402
<h4>The drug-prtoein network:</h4>
number of proteins = 8,934, number of drugs = 519,022, number of interactions = 8,083,600
<h4>The drug-drug network(individuale):</h4>
number of  drugs = 1,556, number of side effects = 5,868, number of drug-side effect association = 286,399
<h4>The drug-drugn network(combination):</h4>
number of  drug combinations = 63,473, number of side effect types = 1,318, number of drug combination-side effect association = 4,651,131
<h2>Final Network:</h2>
<h3>Number of protiens = 19,085 (paper) ....... Number of protiens = 19,081(ppi data) </h3>
<h3>Number of drugs = 645 (paper).......... Number of drugs = 645 (polypharmacy side effect data (combo))</h3>
<h3>Number of protien-protien edges= 715,612(paper) ....... Number of protien-protien edges= 715,612 (ppi data)</h3>
<h3>Number of drug-drug edges= 4,651,131 (paper) ......... Number of drug-drug edges= 4,649,441 (polypharmacy side effect data (combo)) </h3>
<h3>Number of drug-protien edges= 18,596 (paper) ........ Number of drug-protien edges= 18,690 (Drug-target protein (targets))</h3>

In [5]:
#protein_protein_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-ppi.csv", sep=',',header = 0)
#drug_target_protein_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-targets.csv", sep=',',header = 0)
#drug_target_protein_all_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-targets-all.csv", sep=',',header = 0)
#polypharmacy_side_effect_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-combo.csv", sep=',',header = 0)
#side_effect_categories_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-effectcategories.csv", sep=',',header = 0)
#side_effect_individuale_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-mono.csv", sep=',',header = 0)


<h2>Reading 3 small datasets</h2>

In [6]:
import socket

if socket.gethostname()=='Peters-MacBook-Pro-2.local':
    dir ='/Users/peterrobinson/Documents/GIT/decagon/small_datasets/'
    protein_protein_data = pd.read_csv(dir +"small-ppi.csv", sep=',',header = 0)
    polypharmacy_side_effect_data = pd.read_csv(dir + "small-combo.csv", sep=',',header = 0)
    drug_target_protein_data = pd.read_csv(dir +"bio-decagon-targets.csv", sep=',',header  = 0)
else:
    protein_protein_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/small-ppi.csv", sep=',',header = 0)
    polypharmacy_side_effect_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/small-combo.csv", sep=',',header = 0)
    drug_target_protein_data = pd.read_csv("/Users/ravanv/Desktop/Decagon/data/bio-decagon-targets.csv", sep=',',header = 0)

In [7]:
print("size of the protein-protein network:", protein_protein_data.shape)
print("size of the drug-target protein associations:", drug_target_protein_data.shape)
#print("size of the Drug-target protein associations culled from several curated databases :",drug_target_protein_all_data.shape)
print("size of the polypharmacy side effects:", polypharmacy_side_effect_data.shape)
#print("size of the Side effect categories:",side_effect_categories_data.shape)
#print("size of the Side effects of individual drugs:",side_effect_individuale_data.shape)
#protein_protein_data.head()

size of the protein-protein network: (499, 2)
size of the drug-target protein associations: (18690, 2)
size of the polypharmacy side effects: (9999, 4)


In [8]:
val_test_size = 0.1 #changed from 0.05 to 0.1 in the original code
#n_genes = 19081 #from protein-protein network 
#n_drugs = 645 #from drug-drug combo network

<h2> Creating a symmetric adjacency matrix for genes from the protein-protein network </h2>  

In [9]:
df_gene1_gene2 = pd.crosstab(protein_protein_data['Gene 1'], protein_protein_data['Gene 2'])
gene_idx = df_gene1_gene2.columns.union(df_gene1_gene2.index)
df_gene1_gene2 = df_gene1_gene2.reindex(index = gene_idx, columns = gene_idx,fill_value=0)#upper triangle of the adjacency matrix
df_gene2_gene1 = pd.crosstab(protein_protein_data['Gene 2'], protein_protein_data['Gene 1'])
df_gene2_gene1 = df_gene2_gene1.reindex(index = gene_idx, columns = gene_idx, fill_value=0)#lower triangle of the adjacency matrix
gene_adj = df_gene2_gene1.add(df_gene1_gene2, fill_value=0)#creates a symmetric adjacency matrix of Gene 1 and Gene 2 by adding upper triangle and lower triangle
gene_degrees = np.array(gene_adj.sum(axis=0)).squeeze()#get the degrees of genes
print("size of the gene-gene adjacecny matrix:",gene_adj.shape, ", number of genes:",len(gene_degrees))

size of the gene-gene adjacecny matrix: (496, 496) , number of genes: 496


In [10]:
n_genes = len(gene_degrees)

<h3>Making an adjacency matrix for the genes from the adjacency dataframe</h3>

In [11]:
indices_genes = list(range(0,len(gene_idx)))
dict_genes = dict(zip(gene_idx, indices_genes))#creating a dictionary to map genes to indices

In [12]:
gene_adj_mat = np.zeros((n_genes,n_genes)) # adj. matrix of size n_genes * n_genes
for i in range(0,protein_protein_data.shape[0]):#read from the protein-protein file
    gene1 = protein_protein_data.loc[i][0]#read gene1
    gene2 = protein_protein_data.loc[i][1]#read gene2
    #print(gene1,gene2)
    gene1_index = dict_genes.get(gene1)#get the index of gene1 in dictionary
    gene2_index = dict_genes.get(gene2)#get the index of gene2 in dictionary
    #print(gene1_index,gene2_index)
    gene_adj_mat[gene1_index][gene2_index] = 1.0 
    gene_adj_mat[gene2_index][gene1_index] = 1.0

In [13]:
G=nx.from_numpy_matrix(gene_adj_mat)
edges = G.edges(data=True)

In [14]:
def show_graph_with_labels(adjacency_matrix):
    rows, cols = np.where(adjacency_matrix == 1)
    edges = zip(rows.tolist(), cols.tolist())
    gr = nx.Graph()
    gr.add_edges_from(edges)
    nx.draw(gr, node_size=500)
    plt.show()

In [15]:
#show_graph_with_labels(gene_adj_mat)

In [16]:
gene_adj_mat_t = gene_adj_mat.transpose() #transpose matrix of the adj. matrix 

<h2> Creating a list of symmetric adjacency matrices for each side effect from the drug-drug network (combo)</h2>

In [17]:
list_side_effects = []
prev_side_effect = []
for i in range(0,polypharmacy_side_effect_data.shape[0]):#get the list of side effects
    if polypharmacy_side_effect_data.loc[i][2] not in prev_side_effect:
        list_side_effects.append(polypharmacy_side_effect_data.loc[i][2])
        prev_side_effect.append(polypharmacy_side_effect_data.loc[i][2])
# ALTERNATIVELY BLA=set(polypharmacy_side_effect_data['Polypharmacy Side Effect'])

In [18]:
print("number of side effects =", len(list_side_effects) )
#polypharmacy_side_effect_data.head()



number of side effects = 995


In [19]:
#df_drug1_drug2 = pd.crosstab(polypharmacy_side_effect_data['STITCH 1'], polypharmacy_side_effect_data['STITCH 2'])
#drug_idx = df_drug1_drug2.columns.union(df_drug1_drug2.index)#get the list of all drugs
#n_drugs = len(drug_idx)
#print("number of drugs = ", len(drug_idx))

drug_set=set(polypharmacy_side_effect_data['STITCH 1']).union(set(polypharmacy_side_effect_data['STITCH 2']))
n_drugs=len(drug_set)
n_drugs



196

In [20]:
indices_drugs = list(range(0,n_drugs))
dict_drugs = dict(zip(drug_set, indices_drugs))#create a dictionary to map each drug to its index

<h2> Creating adjacency matrices for the drug-protein and protein-drug network.</h2>

In [21]:
#%%pixie_debugger
def create_side_effect_matrices():
    drug_drug_adj_list = []
    for i in range(len(list_side_effects)):
        row = []
        col = []
        values = []
        drug_drug_mat = np.zeros((n_drugs,n_drugs))
        print("i=",i,"/",len(list_side_effects))
        for row_index in range(polypharmacy_side_effect_data.shape[0]):
            if polypharmacy_side_effect_data.loc[row_index][2] == list_side_effects[i]:
               # print(row_index)
                drug_1 = polypharmacy_side_effect_data.loc[row_index][0]
                drug_2 = polypharmacy_side_effect_data.loc[row_index][1] 
                drug_1_index = dict_drugs.get(drug_1)
                drug_2_index = dict_drugs.get(drug_2)
                #print(drug_1_index,drug_2_index)
            #drug_drug_mat[drug_1_index,drug_2_index] =  drug_drug_mat[drug_2_index,drug_1_index] = 1.
            #make sparse
                row.append(drug_1_index)
                col.append(drug_2_index)
                values.append(1.0)
                row.append(drug_2_index)
                col.append(drug_1_index)
                values.append(1.0)
            #print(row,col)
        mysparsematrix=sp.csr_matrix((values,(row,col)),shape=(n_drugs,n_drugs)).toarray()       
        drug_drug_adj_list.append(sp.csr_matrix(mysparsematrix))
        #drug_drug_adj_list.append(sp.csr_matrix(drug_drug_mat))
    drug_degrees_list = [np.array(drug_adj.sum(axis=0)).squeeze() for drug_adj in drug_drug_adj_list]
    return drug_degrees_list,drug_drug_adj_list

In [28]:
import pickle
from pathlib import Path
decagon_file_path = 'decagon_se_matrices.txt'
drug_adjacency_path = 'decagon_drug_adjacency_list'

my_file = Path(decagon_file_path)
my_file2 = Path(drug_adjacency_path)
if my_file.is_file() and my_file2.is_file():
    # file exists
    infile = open(decagon_file_path,'rb')
    drug_degrees_list= pickle.load(infile)
    infile.close()
    infile = open(drug_adjacency_path,'rb')
    drug_drug_adj_list= pickle.load(infile)
    infile.close()
    print("Opened two decagon files from disk")
else:
    drug_degrees_list,drug_drug_adj_list=create_side_effect_matrices()
    outfile = open(decagon_file_path,'wb')
    pickle.dump(drug_degrees_list,outfile)
    outfile.close()
    outfile = open(drug_adjacency_path,'wb')
    pickle.dump(drug_adjacency_path,outfile)
    outfile.close()



i= 0 / 995
i= 1 / 995
i= 2 / 995
i= 3 / 995
i= 4 / 995
i= 5 / 995
i= 6 / 995
i= 7 / 995
i= 8 / 995
i= 9 / 995
i= 10 / 995
i= 11 / 995
i= 12 / 995
i= 13 / 995
i= 14 / 995
i= 15 / 995
i= 16 / 995
i= 17 / 995
i= 18 / 995
i= 19 / 995
i= 20 / 995
i= 21 / 995
i= 22 / 995
i= 23 / 995
i= 24 / 995
i= 25 / 995
i= 26 / 995
i= 27 / 995
i= 28 / 995
i= 29 / 995
i= 30 / 995
i= 31 / 995
i= 32 / 995
i= 33 / 995
i= 34 / 995
i= 35 / 995
i= 36 / 995
i= 37 / 995
i= 38 / 995
i= 39 / 995
i= 40 / 995
i= 41 / 995
i= 42 / 995
i= 43 / 995
i= 44 / 995
i= 45 / 995
i= 46 / 995
i= 47 / 995
i= 48 / 995
i= 49 / 995
i= 50 / 995
i= 51 / 995
i= 52 / 995
i= 53 / 995
i= 54 / 995
i= 55 / 995
i= 56 / 995
i= 57 / 995
i= 58 / 995
i= 59 / 995
i= 60 / 995
i= 61 / 995
i= 62 / 995
i= 63 / 995
i= 64 / 995
i= 65 / 995
i= 66 / 995
i= 67 / 995
i= 68 / 995
i= 69 / 995
i= 70 / 995
i= 71 / 995
i= 72 / 995
i= 73 / 995
i= 74 / 995
i= 75 / 995
i= 76 / 995
i= 77 / 995
i= 78 / 995
i= 79 / 995
i= 80 / 995
i= 81 / 995
i= 82 / 995
i= 83 / 995
i=

i= 639 / 995
i= 640 / 995
i= 641 / 995
i= 642 / 995
i= 643 / 995
i= 644 / 995
i= 645 / 995
i= 646 / 995
i= 647 / 995
i= 648 / 995
i= 649 / 995
i= 650 / 995
i= 651 / 995
i= 652 / 995
i= 653 / 995
i= 654 / 995
i= 655 / 995
i= 656 / 995
i= 657 / 995
i= 658 / 995
i= 659 / 995
i= 660 / 995
i= 661 / 995
i= 662 / 995
i= 663 / 995
i= 664 / 995
i= 665 / 995
i= 666 / 995
i= 667 / 995
i= 668 / 995
i= 669 / 995
i= 670 / 995
i= 671 / 995
i= 672 / 995
i= 673 / 995
i= 674 / 995
i= 675 / 995
i= 676 / 995
i= 677 / 995
i= 678 / 995
i= 679 / 995
i= 680 / 995
i= 681 / 995
i= 682 / 995
i= 683 / 995
i= 684 / 995
i= 685 / 995
i= 686 / 995
i= 687 / 995
i= 688 / 995
i= 689 / 995
i= 690 / 995
i= 691 / 995
i= 692 / 995
i= 693 / 995
i= 694 / 995
i= 695 / 995
i= 696 / 995
i= 697 / 995
i= 698 / 995
i= 699 / 995
i= 700 / 995
i= 701 / 995
i= 702 / 995
i= 703 / 995
i= 704 / 995
i= 705 / 995
i= 706 / 995
i= 707 / 995
i= 708 / 995
i= 709 / 995
i= 710 / 995
i= 711 / 995
i= 712 / 995
i= 713 / 995
i= 714 / 995
i= 715 / 995

ValueError: too many values to unpack (expected 2)

In [24]:
drug_gene_adj = np.zeros((n_drugs,n_genes))
gene_drug_adj = np.zeros((n_genes, n_drugs))

In [25]:
#%%pixie_debugger
for i in range(0,drug_target_protein_data.shape[0]):
    drug = drug_target_protein_data.loc[i][0]
    gene = drug_target_protein_data.loc[i][1]
    if drug in drug_set and gene in gene_idx:
        gene_index = dict_genes.get(gene)
        drug_index = dict_drugs.get(drug)
        print(gene_index, drug_index)
        drug_gene_adj[drug_index][gene_index] = 1.0
        gene_drug_adj[gene_index][drug_index] = 1.0

321 176
139 176
134 176
461 176
495 176
222 176
72 176
92 176
255 176
48 176
241 176
482 176
240 176
118 176
225 176
288 176
64 176
107 176
254 176
197 176
386 176
391 176
136 176
476 176
200 176
10 176
493 176
41 176
400 176
492 176
95 176
396 176
102 176
50 176
100 176
16 176
170 176
184 176
276 176
494 176
453 176
484 176
351 176
7 176
67 176
4 176
123 176
122 176
12 176
55 176
56 176
98 176
49 176
51 176
261 176
248 176
207 176
125 176
124 176
1 176
479 176
324 176
294 176
376 176
253 176
192 176
133 29
0 189
351 189
44 189
116 127
44 161
178 90
127 67
227 67
0 67
319 128
0 128
265 128
127 128
227 128
351 128
349 128
44 128
133 193
324 192
116 20


In [26]:
drug_target_protein_data.shape[0]

18690

In [27]:
adj_mats_orig = {
    (0, 0): [sp.csr_matrix(gene_adj_mat), sp.csr_matrix(gene_adj_mat_t)],
    (0, 1): [sp.csr_matrix(gene_drug_adj)],
    (1, 0): [sp.csr_matrix(drug_gene_adj)],
    (1, 1): drug_drug_adj_list + [x.transpose(copy=True) for x in drug_drug_adj_list],
}

NameError: name 'drug_drug_adj_list' is not defined

In [None]:
degrees = {
    0: [gene_degrees, gene_degrees],
    1: drug_degrees_list + drug_degrees_list,
}
# featureless (genes)
gene_feat = sp.identity(n_genes)
gene_nonzero_feat, gene_num_feat = gene_feat.shape
gene_feat = preprocessing.sparse_to_tuple(gene_feat.tocoo())
# features (drugs)
drug_feat = sp.identity(n_drugs)
drug_nonzero_feat, drug_num_feat = drug_feat.shape
drug_feat = preprocessing.sparse_to_tuple(drug_feat.tocoo())
# data representation
num_feat = {
    0: gene_num_feat,
    1: drug_num_feat,
}
nonzero_feat = {
    0: gene_nonzero_feat,
    1: drug_nonzero_feat,
}
feat = {
    0: gene_feat,
    1: drug_feat,
}
edge_type2dim = {k: [adj.shape for adj in adjs] for k, adjs in adj_mats_orig.items()}
edge_type2decoder = {
    (0, 0): 'bilinear',
    (0, 1): 'bilinear',
    (1, 0): 'bilinear',
    (1, 1): 'dedicom',
}

In [None]:
edge_types = {k: len(v) for k, v in adj_mats_orig.items()}
num_edge_types = sum(edge_types.values())
print("Edge types:", "%d" % num_edge_types)

###########################################################
#
# Settings and placeholders
#
###########################################################

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('neg_sample_size', 1, 'Negative sample size.')
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 64, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_float('dropout', 0.1, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('max_margin', 0.1, 'Max margin parameter in hinge loss')
flags.DEFINE_integer('batch_size', 1, 'minibatch size.') # was 512
flags.DEFINE_boolean('bias', True, 'Bias term.')
# Important -- Do not evaluate/print validation performance every iteration as it can take
# substantial amount of time
PRINT_PROGRESS_EVERY = 150

print("Defining placeholders")
placeholders = construct_placeholders(edge_types)

In [None]:
print("Create minibatch iterator")
minibatch = EdgeMinibatchIterator(
    adj_mats=adj_mats_orig,
    feat=feat,
    edge_types=edge_types,
    batch_size=FLAGS.batch_size,
    val_test_size=val_test_size
)

In [None]:
print("Create model")
model = DecagonModel(
    placeholders=placeholders,
    num_feat=num_feat,
    nonzero_feat=nonzero_feat,
    edge_types=edge_types,
    decoders=edge_type2decoder,
)

In [None]:
print("Create optimizer")
with tf.name_scope('optimizer'):
    opt = DecagonOptimizer(
        embeddings=model.embeddings,
        latent_inters=model.latent_inters,
        latent_varies=model.latent_varies,
        degrees=degrees,
        edge_types=edge_types,
        edge_type2dim=edge_type2dim,
        placeholders=placeholders,
        batch_size=FLAGS.batch_size,
        margin=FLAGS.max_margin
    )

In [None]:
print("Initialize session")
sess = tf.Session()
sess.run(tf.global_variables_initializer())
feed_dict = {}

In [None]:
###########################################################
#
# Train model
#
###########################################################

print("Train model")
for epoch in range(FLAGS.epochs):

    minibatch.shuffle()
    itr = 0
    while not minibatch.end():
        print("iteration:", itr)
        # Construct feed dictionary
        feed_dict = minibatch.next_minibatch_feed_dict(placeholders=placeholders)
        feed_dict = minibatch.update_feed_dict(
            feed_dict=feed_dict,
            dropout=FLAGS.dropout,
            placeholders=placeholders)

        t = time.time()

        # Training step: run single weight update
        outs = sess.run([opt.opt_op, opt.cost, opt.batch_edge_type_idx], feed_dict=feed_dict)
        train_cost = outs[1]
        batch_edge_type = outs[2]

        if itr % PRINT_PROGRESS_EVERY == 0:
            val_auc, val_auprc, val_apk = get_accuracy_scores(
                minibatch.val_edges, minibatch.val_edges_false,
                minibatch.idx2edge_type[minibatch.current_edge_type_idx])

            print("Epoch:", "%04d" % (epoch + 1), "Iter:", "%04d" % (itr + 1), "Edge:", "%04d" % batch_edge_type,
                  "train_loss=", "{:.5f}".format(train_cost),
                  "val_roc=", "{:.5f}".format(val_auc), "val_auprc=", "{:.5f}".format(val_auprc),
                  "val_apk=", "{:.5f}".format(val_apk), "time=", "{:.5f}".format(time.time() - t))

        itr += 1

print("Optimization finished!")

In [None]:
for et in range(num_edge_types):
    roc_score, auprc_score, apk_score = get_accuracy_scores(
        minibatch.test_edges, minibatch.test_edges_false, minibatch.idx2edge_type[et])
    print("Edge type=", "[%02d, %02d, %02d]" % minibatch.idx2edge_type[et])
    print("Edge type:", "%04d" % et, "Test AUROC score", "{:.5f}".format(roc_score))
    print("Edge type:", "%04d" % et, "Test AUPRC score", "{:.5f}".format(auprc_score))
    print("Edge type:", "%04d" % et, "Test AP@k score", "{:.5f}".format(apk_score))
    print()
