In [1]:
cat_name="Men"
subcat_name="Men_also_bought"

In [2]:
import numpy as np
import scipy.sparse as sp
import time
import os

# TODO: clean unused code

class DataLoaderAmazon(object):
    """
    Load amazon data.
    """
    def __init__(self, cat_rel='bought_together'):
        """
        Args:
            normalize: normalize the features or not
            cat_rel: category and type of relation used
        """
        super(DataLoaderAmazon, self).__init__()
        self.cat_rel = cat_rel

        self.path_dataset = 'D:/School/FARS/Dataset/'+cat_name+'/'+cat_rel+'/'
        assert os.path.exists(self.path_dataset)

        print('initializing dataloader...')
        self.init_dataset()

    def init_dataset(self):
        path_dataset = self.path_dataset
        adj_file = path_dataset + 'adj.npz'
        feats_file = path_dataset + 'deep_feats.npy'
        np.random.seed(1234)

        self.adj = sp.load_npz(adj_file).astype(np.int32)
        node_features = np.load(feats_file)
        self.features = node_features

        # get lower tiangle of the adj matrix to avoid duplicate edges
        self.lower_adj = sp.tril(self.adj).tocsr()

        # get positive edges and split them into train, val and test
        pos_r_idx, pos_c_idx = self.lower_adj.nonzero()
        pos_labels = np.array(self.lower_adj[pos_r_idx, pos_c_idx]).squeeze()

        n_pos = pos_labels.shape[0] # number of positive edges
        perm = list(range(n_pos))
        np.random.shuffle(perm)
        pos_labels, pos_r_idx, pos_c_idx = pos_labels[perm], pos_r_idx[perm], pos_c_idx[perm]
        n_train = int(n_pos*0.80)
        n_val = int(n_pos*0.10)

        self.train_pos_labels, self.train_pos_r_idx, self.train_pos_c_idx = pos_labels[:n_train], pos_r_idx[:n_train], pos_c_idx[:n_train]
        self.val_pos_labels, self.val_pos_r_idx, self.val_pos_c_idx = pos_labels[n_train:n_train + n_val], pos_r_idx[n_train:n_train + n_val], pos_c_idx[n_train:n_train + n_val]
        self.test_pos_labels, self.test_pos_r_idx, self.test_pos_c_idx = pos_labels[n_train + n_val:], pos_r_idx[n_train + n_val:], pos_c_idx[n_train + n_val:]

    def get_phase(self, phase):
        print('get phase: {}'.format(phase))
        assert phase in ['train', 'valid', 'test']

        lower_adj = self.lower_adj

        # get the positive edges

        if phase == 'train':
            pos_labels, pos_r_idx, pos_c_idx = self.train_pos_labels, self.train_pos_r_idx, self.train_pos_c_idx
        elif phase == 'valid':
            pos_labels, pos_r_idx, pos_c_idx = self.val_pos_labels, self.val_pos_r_idx, self.val_pos_c_idx
        elif phase == 'test':
            pos_labels, pos_r_idx, pos_c_idx = self.test_pos_labels, self.test_pos_r_idx, self.test_pos_c_idx

        # build adj matrix
        full_adj = sp.csr_matrix((
                    np.hstack([pos_labels, pos_labels]),
                    (np.hstack([pos_r_idx, pos_c_idx]), np.hstack([pos_c_idx, pos_r_idx]))
                ),
                shape=(lower_adj.shape[0], lower_adj.shape[0])
            )
        setattr(self, 'full_{}_adj'.format(phase), full_adj)

        # split the positive edges into the ones used for evaluation and the ones used as message passing
        n_pos = pos_labels.shape[0] # number of positive edges
        n_eval = int(n_pos/2)
        mp_pos_labels, mp_pos_r_idx, mp_pos_c_idx = pos_labels[n_eval:], pos_r_idx[n_eval:], pos_c_idx[n_eval:]
        # this are the positive examples that will be used to compute the loss function
        eval_pos_labels, eval_pos_r_idx, eval_pos_c_idx = pos_labels[:n_eval], pos_r_idx[:n_eval], pos_c_idx[:n_eval]

        # get the negative edges

        print('Sampling negative edges...')
        before = time.time()
        n_train_neg = eval_pos_labels.shape[0] # set the number of negative training edges that will be needed to sample at each iter
        neg_labels = np.zeros((n_train_neg))
        # get the possible indexes to be sampled (basically all indexes if there aren't restrictions)
        poss_nodes = np.arange(lower_adj.shape[0])

        neg_r_idx = np.zeros((n_train_neg))
        neg_c_idx = np.zeros((n_train_neg))

        for i in range(n_train_neg):
            r_idx, c_idx = self.get_negative_training_edge(poss_nodes, poss_nodes.shape[0], lower_adj)
            neg_r_idx[i] = r_idx
            neg_c_idx[i] = c_idx
        print('Sampling done, time elapsed: {}'.format(time.time() - before))

        # build adj matrix
        adj = sp.csr_matrix((
                    np.hstack([mp_pos_labels, mp_pos_labels]),
                    (np.hstack([mp_pos_r_idx, mp_pos_c_idx]), np.hstack([mp_pos_c_idx, mp_pos_r_idx]))
                ),
                shape=(lower_adj.shape[0], lower_adj.shape[0])
            )
        # remove the labels of the negative edges which are 0
        adj.eliminate_zeros()

        labels = np.append(eval_pos_labels, neg_labels)
        r_idx = np.append(eval_pos_r_idx, neg_r_idx)
        c_idx = np.append(eval_pos_c_idx, neg_c_idx)

        return self.features, adj, labels, r_idx, c_idx

    def normalize_features(self, feats, get_moments=False, mean=None, std=None):
        reuse_mean = mean is not None and std is not None
        if feats.shape[1] == 256: # image features
            if reuse_mean:
                mean_feats = mean
                std_feats = std
            else:
                mean_feats = feats.mean(axis=0)
                std_feats = feats.std(axis=0)

            # normalize
            feats = (feats - mean_feats)/std_feats

        else:
            raise NotImplementedError()

        if get_moments:
            return feats, mean_feats, std_feats
        return feats

    def get_negative_training_edge(self, poss_nodes, num_nodes, lower_adj):
        """
        Sample negative training edges.
        """
        keep_search = True
        while keep_search: # sampled a positive edge
            v = np.random.randint(num_nodes)
            u = np.random.randint(num_nodes)

            keep_search = lower_adj[v, u] == 1 or lower_adj[u, v] == 1

        # assert lower_adj[v_sample, s_sample] == 0
        # assert u_sample < v_sample; assert u < v;  assert u != v

        return u,v

In [3]:
#model/__init__.py

import tensorflow.compat.v1 as tf
import numpy as np

def weight_variable_he_init(input_dim, output_dim, name):
    """MSRA or He init"""
    return tf.get_variable(name, [input_dim, output_dim],
                    initializer=tf.contrib.layers.variance_scaling_initializer())

def weight_variable_truncated_normal(input_dim, output_dim, name=""):
    """Create a weight variable with truncated normal distribution, values
    that are more than 2 stddev away from the mean are redrawn."""

    initial = tf.truncated_normal([input_dim, output_dim], stddev=0.5)
    return tf.Variable(initial, name=name)


def weight_variable_random_uniform(input_dim, output_dim=None, name=""):
    """Create a weight variable with variables drawn from a
    random uniform distribution. Parameters used are taken from paper by
    Xavier Glorot and Yoshua Bengio:
    http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf"""
    if output_dim is not None:
        init_range = np.sqrt(6.0 / (input_dim + output_dim))
        initial = tf.random_uniform([input_dim, output_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)
    else:
        init_range = np.sqrt(6.0 / input_dim)
        initial = tf.random_uniform([input_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def weight_variable_random_uniform_relu(input_dim, output_dim, name=""):
    """Create a weight variable with variables drawn from a
    random uniform distribution. Parameters used are taken from paper by
    Xavier Glorot and Yoshua Bengio:
    http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
    and are optimized for ReLU activation function."""

    init_range = np.sqrt(2.0 / (input_dim + output_dim))
    initial = tf.random_uniform([input_dim, output_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def bias_variable_truncated_normal(shape, name=""):
    """Create a bias variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.5)
    return tf.Variable(initial, name=name)


def bias_variable_zero(shape, name=""):
    """Create a bias variable initialized as zero."""
    initial = tf.zeros(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def bias_variable_one(shape, name=""):
    """Create a bias variable initialized as ones."""
    initial = tf.ones(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def orthogonal(shape, scale=1.1, name=None):
    """
    From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)

    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    return tf.Variable(scale * q[:shape[0], :shape[1]], name=name, dtype=tf.float32)


def bias_variable_const(shape, val, name=""):
    """Create a bias variable initialized as zero."""
    value = tf.to_float(val)
    initial = tf.fill(shape, value, name=name)
    return tf.Variable(initial, name=name)

In [4]:
#visual-compatibility/model/layers.py

# global unique layer ID dictionary for layer name assignment
_LAYER_UIDS = {}


def dot(x, y, sparse=False):
    """Wrapper for tf.matmul (sparse vs dense)."""
    if sparse:
        res = tf.sparse_tensor_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res


def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs
    """
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]


class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    # Properties
        name: String, defines the variable scope of the layer.
            Layers with common name share variables. (TODO)
        logging: Boolean, switches Tensorflow histogram logging on/off
    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
        _log_vars(): Log all variables
    """
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, input):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/input', input)
            outputs = self._call(input)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])


class Dense(Layer):
    """Dense layer"""
    def __init__(self, input_dim, output_dim, is_train, dropout=0., act=tf.nn.relu,
                 bias=False, batch_norm=False, **kwargs):
        super(Dense, self).__init__(**kwargs)

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = weight_variable_random_uniform(input_dim, output_dim, name="weights")

            if bias:
                self.vars['node_bias'] = bias_variable_zero([output_dim], name="bias_n")


        self.bias = bias
        self.batch_norm = batch_norm
        self.is_train = is_train

        self.dropout = dropout
        self.act = act
        if self.logging:
            self._log_vars()

    def _call(self, input):
        x_n = input
        x_n = tf.nn.dropout(x_n, 1 - self.dropout)
        x_n = tf.matmul(x_n, self.vars['weights'])

        if self.bias and not self.batch_norm: # do not use bias if using bn
            x_n += self.vars['node_bias']

        n_outputs = self.act(x_n)

        if self.batch_norm:
            n_outputs = tf.layers.batch_normalization(n_outputs, training=self.is_train)

        return n_outputs

    def __call__(self, input):
        with tf.name_scope(self.name):
            if self.logging:
                tf.summary.histogram(self.name + '/input', input)
            outputs_n = self._call(input)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs_n', outputs_n)
            return outputs_n


class GCN(Layer):
    """Graph convolution layer for multiple degree adjacencies"""
    def __init__(self, input_dim, output_dim, support, num_support, is_train, dropout=0.,
                 act=tf.nn.relu, bias=False, batch_norm=False, init='def', **kwargs):
        super(GCN, self).__init__(**kwargs)
        assert init in ['def', 'he']
        with tf.variable_scope(self.name + '_vars'):
            if init == 'def':
                init_func = weight_variable_random_uniform
            else:
                init_func = weight_variable_he_init

            
            self.vars['weights'] = [init_func(input_dim, output_dim,
                                            name='weights_n_%d' % i)
                                            for i in range(num_support)]

            if bias:
                self.vars['bias_n'] = bias_variable_zero([output_dim], name="bias_n")

            self.weights = self.vars['weights']

        self.dropout = dropout

        self.batch_norm = batch_norm
        self.is_train = is_train

        self.bias = bias
        # TODO, REMOVE
        # support = tf.sparse_split(axis=1, num_split=num_support, sp_input=support)
        self.support = support

        self.act = act

        if self.logging:
            self._log_vars()

    def _call(self, input):
        x_n = tf.nn.dropout(input, 1 - self.dropout)

        supports_n = []

        for i in range(len(self.support)):
            wn = self.weights[i]
            # multiply feature matrices with weights
            tmp_n = dot(x_n, wn, sparse=self.sparse_inputs)

            support = self.support[i]

            # then multiply with rating matrices
            supports_n.append(tf.sparse_tensor_dense_matmul(support, tmp_n))

        z_n = tf.add_n(supports_n)

        if self.bias:
            z_n = tf.nn.bias_add(z_n, self.vars['bias_n'])

        n_outputs = self.act(z_n)

        if self.batch_norm:
            n_outputs = tf.layers.batch_normalization(n_outputs, training=self.is_train)

        return n_outputs

    def __call__(self, input):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/input', input)
            outputs_n = self._call(input)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs_n', outputs_n)
            return outputs_n


class MLPDecoder(Layer):
    """
    MLP-based decoder model layer for edge-prediction.
    """
    def __init__(self, num_classes, r_indices, c_indices, input_dim,
                 dropout=0., act=lambda x: x, n_out=1, use_bias=False, **kwargs):
        super(MLPDecoder, self).__init__(**kwargs)

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = weight_variable_random_uniform(input_dim, n_out, name='weights')
            if use_bias:
                self.vars['bias'] = bias_variable_zero([n_out], name="bias")

        self.r_indices = r_indices
        self.c_indices = c_indices

        self.dropout = dropout
        self.act = act
        self.n_out = n_out
        self.use_bias = use_bias
        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        node_inputs = tf.nn.dropout(inputs, 1 - self.dropout)

        # r corresponds to the selected rows, and c to the selected columns
        row_inputs = tf.gather(node_inputs, self.r_indices)
        col_inputs = tf.gather(node_inputs, self.c_indices)

        diff = tf.abs(row_inputs - col_inputs)

        outputs = tf.matmul(diff, self.vars['weights'])

        if self.use_bias:
            outputs += self.vars['bias']

        if self.n_out == 1:
            outputs = tf.squeeze(outputs) # remove single dimension

        outputs = self.act(outputs)

        return outputs

In [5]:
# model/metrics.py
import numpy as np

def softmax_accuracy(preds, labels):
    """
    Accuracy for multiclass model.
    :param preds: predictions
    :param labels: ground truth labelt
    :return: average accuracy
    """
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.to_int64(labels))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    return tf.reduce_mean(accuracy_all)


def sigmoid_accuracy(preds, labels):
    """
    Accuracy for binary class model.
    :param preds: predictions
    :param labels: ground truth label
    :return: average accuracy
    """
    # if pred > 0 then sigmoid(pred) > 0.5
    correct_prediction = tf.equal(tf.cast(preds >= 0.0, tf.int64), tf.to_int64(labels))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    return tf.reduce_mean(accuracy_all)


def binary_accuracy(preds, labels):
    """
    Accuracy for binary class model.
    :param preds: predictions
    :param labels: ground truth label
    :return: average accuracy
    """
    correct_prediction = tf.equal(tf.cast(preds >= 0.5, tf.int64), tf.to_int64(labels))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    return tf.reduce_mean(accuracy_all)


def softmax_confusion_matrix(preds, labels):
    """
    Computes the confusion matrix. The rows are real labels, and columns the
    predictions.
    """
    int_preds = preds >= 0.0
    int_preds = tf.cast(int_preds, tf.int32)

    return tf.confusion_matrix(labels, int_preds)

def softmax_cross_entropy(outputs, labels):
    """ computes average softmax cross entropy """

    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=labels)
    return tf.reduce_mean(loss)

def sigmoid_cross_entropy(outputs, labels):
    """ computes average binary cross entropy """

    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=outputs, labels=labels)
    return tf.reduce_mean(loss)

def binary_cross_entropy(outputs, labels):
    # clip values to avoid having log(0)
    eps = 1e-4
    outputs = tf.clip_by_value(outputs, eps, 1-eps)
    cross_entropy = tf.reduce_mean(labels * -tf.log(outputs) + (1-labels) * -tf.log(1-outputs))

    return cross_entropy

In [6]:
# model/CompatibilityGAE.py
flags = tf.app.flags
FLAGS = flags.FLAGS


class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging', 'wd'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg

        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.placeholders = {}

        self.layers = []
        self.activations = []

        self.inputs = None
        self.outputs = None

        self.loss = 0
        self.total_loss = 0 # to use with weight decay
        self.accuracy = 0
        self.confmat = 0
        self.optimizer = None
        self.opt_op = None
        self.global_step = tf.Variable(0, trainable=False)
        if 'wd' in kwargs.keys():
            self.wd = kwargs.get('wd')
        else:
            self.wd = 0.

    def _build(self):
        raise NotImplementedError

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]

        # Build metrics
        self._loss()
        self._accuracy()
        self._confmat()

        if self.wd:
            reg_weights = tf.get_collection("l2_regularize")
            loss_l2 = tf.add_n([ tf.nn.l2_loss(v) for v in reg_weights ]) * self.wd
            self.total_loss += self.loss + loss_l2
        else:
            self.total_loss = self.loss

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            reg_weights = tf.get_collection("l2_regularize")
            self.opt_op = self.optimizer.minimize(self.total_loss, global_step=self.global_step)

    def predict(self):
        pass

    def _loss(self):
        raise NotImplementedError

    def _accuracy(self):
        raise NotImplementedError

class CompatibilityGAE(Model):
    def __init__(self, placeholders, input_dim, num_classes, num_support,
                 learning_rate, hidden, batch_norm=False,
                 multi=False, init='def', **kwargs):
        super(CompatibilityGAE, self).__init__(**kwargs)

        self.inputs = placeholders['node_features']
        self.support = placeholders['support']
        self.dropout = placeholders['dropout']
        self.labels = placeholders['labels']
        self.r_indices = placeholders['row_indices']
        self.c_indices = placeholders['col_indices']
        self.is_train = placeholders['is_train']

        self.hidden = hidden
        self.num_classes = num_classes
        self.num_support = num_support
        self.input_dim = input_dim
        self.learning_rate = learning_rate
        self.batch_norm = batch_norm
        self.init = init

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1.e-8)

        self.build()

    def _loss(self):
        """
        For mlp decoder.
        """
        self.loss += sigmoid_cross_entropy(self.outputs, self.labels)

        tf.summary.scalar('loss', self.loss)

    def _confmat(self):
        self.confmat += softmax_confusion_matrix(self.outputs, self.labels)

    def _accuracy(self):
        self.accuracy = sigmoid_accuracy(self.outputs, self.labels)

    def predict(self):
        return tf.cast(self.outputs >= 0.0, tf.int64)

    def _build(self):
        input_dim = self.input_dim
        act_funct = tf.nn.relu
        # stack of GCN layers as the encoder
        for l in range(len(self.hidden)):
            self.layers.append(GCN(input_dim=input_dim,
                                     output_dim=self.hidden[l],
                                     support=self.support,
                                     num_support=self.num_support,
                                     act=act_funct,
                                     bias=not self.batch_norm,
                                     dropout=self.dropout,
                                     logging=self.logging,
                                     batch_norm=self.batch_norm,
                                     is_train=self.is_train,
                                     init=self.init))
            input_dim = self.hidden[l]

        input_dim = self.hidden[-1]

        # this is the decoder
        self.layers.append(MLPDecoder(num_classes=self.num_classes,
                                           r_indices=self.r_indices,
                                           c_indices=self.c_indices,
                                           input_dim=input_dim,
                                           dropout=0.,
                                           act=lambda x: x,
                                           logging=self.logging,
                                           n_out=1,
                                           use_bias=True))

In [7]:
#  visual-compatibility/utils.py 
import json
import numpy as np
import time
import scipy.sparse as sp
from scipy.sparse import csr_matrix

def construct_feed_dict(placeholders, node_features, support, labels, r_indices, c_indices,
                        dropout, is_train=True):
    """
    Create feed dictionary.
    """

    if not type(support[0]) == tuple:
        support = [sparse_to_tuple(sup) for sup in support]

    feed_dict = dict()
    feed_dict.update({placeholders['node_features']: node_features})
    feed_dict.update({placeholders['support'][i]: support[i] for i in range(len(support))})

    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['row_indices']: r_indices})
    feed_dict.update({placeholders['col_indices']: c_indices})

    feed_dict.update({placeholders['dropout']: dropout})
    feed_dict.update({placeholders['is_train']: is_train})

    return feed_dict

def support_dropout(sup, do, edge_drop=False):
    before = time.time()
    sup = sp.tril(sup)
    assert do > 0.0 and do < 1.0
    n_nodes = sup.shape[0]
    # nodes that I want to isolate
    isolate = np.random.choice(range(n_nodes), int(n_nodes*do), replace=False)
    nnz_rows, nnz_cols = sup.nonzero()

    # mask the nodes that have been selected
    mask = np.in1d(nnz_rows, isolate)
    mask += np.in1d(nnz_cols, isolate)
    assert mask.shape[0] == sup.data.shape[0]

    sup.data[mask] = 0
    sup.eliminate_zeros()

    if edge_drop:
        prob = np.random.uniform(0, 1, size=sup.data.shape)
        remove = prob < do
        sup.data[remove] = 0
        sup.eliminate_zeros()

    sup = sup + sup.transpose()
    return sup

def write_log(data, logfile):
    with open(logfile, 'w') as outfile:
        json.dump(data, outfile)

def get_degree_supports(adj, k, adj_self_con=False, verbose=True):
    if verbose:
        print('Computing adj matrices up to {}th degree'.format(k))
    supports = [sp.identity(adj.shape[0])]
    if k == 0: # return Identity matrix (no message passing)
        return supports
    assert k > 0
    supports = [sp.identity(adj.shape[0]), adj.astype(np.float64) + adj_self_con*sp.identity(adj.shape[0])]

    prev_power = adj
    for i in range(k-1):
        pow = prev_power.dot(adj)
        new_adj = ((pow) == 1).astype(np.float64)
        new_adj.setdiag(0)
        new_adj.eliminate_zeros()
        supports.append(new_adj)
        prev_power = pow
    return supports

def normalize_nonsym_adj(adj):
    degree = np.asarray(adj.sum(1)).flatten()

    # set zeros to inf to avoid dividing by zero
    degree[degree == 0.] = np.inf

    degree_inv_sqrt = 1. / np.sqrt(degree)
    degree_inv_sqrt_mat = sp.diags([degree_inv_sqrt], [0])

    degree_inv = degree_inv_sqrt_mat.dot(degree_inv_sqrt_mat)

    adj_norm = degree_inv.dot(adj)

    return adj_norm

def sparse_to_tuple(sparse_mx):
    """ change of format for sparse matrix. This format is used
    for the feed_dict where sparse matrices need to be linked to placeholders
    representing sparse matrices. """

    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape

class Graph(object):
    """docstring for Graph."""
    def __init__(self, adj):
        super(Graph, self).__init__()
        self.adj = adj
        self.n_nodes = adj.shape[0]
        self.level = 0

    def run_K_BFS(self, n, K):
        """
        Returns a list of K edges, sampled using BFS starting from n
        """
        visited = set()
        edges = []
        self.BFS(n, visited, K, edges)
        assert len(edges) <= K

        return edges

    def BFS(self, n, visited, K, edges):
        queue = [n]
        while len(queue) > 0:
            node = queue.pop(0)
            if node not in visited:
                visited.add(node)
                neighs = list(self.adj[node].nonzero()[1])
                for neigh in neighs:
                    if neigh not in visited:
                        edges.append((node, neigh))
                        queue.append(neigh)
                    if len(edges) == K:
                        return

In [8]:
import sys; sys.argv=['']; del sys
import argparse
import time
import numpy as np
import scipy.sparse as sp
import json
import os
import shutil
tf.disable_eager_execution()

# Set random seed
seed = int(time.time()) # 12342
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
ap = argparse.ArgumentParser("")
ap.add_argument("-d", "--dataset", type=str, default="amazon",
                choices=['amazon'],
                help="Dataset string.")

ap.add_argument("-lr", "--learning_rate", type=float, default=0.01,
                help="Learning rate")

ap.add_argument("-wd", "--weight_decay", type=float, default=0.,
                help="Learning rate")

ap.add_argument("-e", "--epochs", type=int, default=300,
                help="Number training epochs")

ap.add_argument("-hi", "--hidden", type=int, nargs='+', default=[256,128,64],
                help="Number hidden units in the GCN layers.")

ap.add_argument("-do", "--dropout", type=float, default=0.5,
                help="Dropout fraction")

ap.add_argument("-deg", "--degree", type=int, default=1,
                help="Degree of the convolution (Number of supports)")

ap.add_argument("-sdir", "--summaries_dir", type=str, default="D:/School/FARS/Dataset/"+cat_name+"/"+subcat_name+"/logs",
                help="Directory for saving tensorflow summaries.")

ap.add_argument("-sup_do", "--support_dropout", type=float, default=0.15,
                help="Use dropout on the support matrices, dropping all the connections from some nodes")

ap.add_argument('-ws', '--write_summary', dest='write_summary', default=False,
                help="Option to turn on summary writing", action='store_true')

fp = ap.add_mutually_exclusive_group(required=False)
fp.add_argument('-bn', '--batch_norm', dest='batch_norm',
                help="Option to turn on batchnorm in GCN layers", action='store_true')
fp.add_argument('-no_bn', '--no_batch_norm', dest='batch_norm',
                help="Option to turn off batchnorm", action='store_false')
ap.set_defaults(batch_norm=True)

ap.add_argument("-amzd", "--amz_data", type=str, default=subcat_name,
            choices=[subcat_name],
            help="Dataset string.")

args = vars(ap.parse_args())

print('Settings:')
print(args, '\n')

# Define parameters
DATASET = args['dataset']
NB_EPOCH = args['epochs']
DO = args['dropout']
HIDDEN = args['hidden']
LR = args['learning_rate']
WRITESUMMARY = args['write_summary']
SUMMARIESDIR = args['summaries_dir']
FEATURES = "img"
NUMCLASSES = 2
DEGREE = args['degree']
BATCH_NORM = args['batch_norm']
BN_AS_TRAIN = False
SUP_DO = args['support_dropout']
ADJ_SELF_CONNECTIONS = True
VERBOSE = True

# prepare data_loader
if DATASET == 'amazon':
    cat_rel = args['amz_data']
    dl = DataLoaderAmazon(cat_rel=cat_rel)
    train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase('train')
    _, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase('valid')
    _, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase('test')
    train_features, mean, std = dl.normalize_features(train_features, get_moments=True)
else:
    raise NotImplementedError('A data loader for dataset {} does not exist'.format(DATASET))

if not os.path.exists(SUMMARIESDIR):
    os.makedirs(SUMMARIESDIR)

if SUMMARIESDIR == 'logs/':
    SUMMARIESDIR += str(len(os.listdir(SUMMARIESDIR)))

log_file = SUMMARIESDIR + '/log.json'
log_data = {
    'val':{'loss':[], 'acc':[]},
    'train':{'loss':[], 'acc':[]},
    'questions':{
        'loss':[], 'acc':[],
        'task_acc': [], 'task_acc_cf': [], 'res_task_acc': [],
    },
}

if not os.path.exists(SUMMARIESDIR):
    os.makedirs(SUMMARIESDIR)

train_support = get_degree_supports(adj_train, DEGREE, adj_self_con=ADJ_SELF_CONNECTIONS)
val_support = get_degree_supports(adj_val, DEGREE, adj_self_con=ADJ_SELF_CONNECTIONS)
test_support = get_degree_supports(adj_test, DEGREE, adj_self_con=ADJ_SELF_CONNECTIONS)
if DATASET != 'amazon':
    q_support = get_degree_supports(adj_q, DEGREE, adj_self_con=ADJ_SELF_CONNECTIONS)
if DATASET == 'polyvore':
    res_q_support = get_degree_supports(res_adj_q, DEGREE, adj_self_con=ADJ_SELF_CONNECTIONS)

for i in range(1, len(train_support)):
    train_support[i] = normalize_nonsym_adj(train_support[i])
    val_support[i] = normalize_nonsym_adj(val_support[i])
    test_support[i] = normalize_nonsym_adj(test_support[i])
    if DATASET != 'amazon':
        q_support[i] = normalize_nonsym_adj(q_support[i])
    if DATASET == 'polyvore':
        res_q_support[i] = normalize_nonsym_adj(res_q_support[i])    

num_support = len(train_support)
placeholders = {
    'row_indices': tf.placeholder(tf.int32, shape=(None,)),
    'col_indices': tf.placeholder(tf.int32, shape=(None,)),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'weight_decay': tf.placeholder_with_default(0., shape=()),
    'is_train': tf.placeholder_with_default(True, shape=()),
    'support': [tf.sparse_placeholder(tf.float32, shape=(None, None)) for sup in range(num_support)],
    'node_features': tf.placeholder(tf.float32, shape=(None, None)),
    'labels': tf.placeholder(tf.float32, shape=(None,))   
}

model = CompatibilityGAE(placeholders,
                    input_dim=train_features.shape[1],
                    num_classes=NUMCLASSES,
                    num_support=num_support,
                    hidden=HIDDEN,
                    learning_rate=LR,
                    logging=True,
                    batch_norm=BATCH_NORM,
                    wd=args['weight_decay'])

# Feed_dicts for validation and test set stay constant over different update steps
train_feed_dict = construct_feed_dict(placeholders, train_features, train_support,
                    train_labels, train_r_indices, train_c_indices, DO)
if DATASET != 'amazon':
    val_feed_dict = construct_feed_dict(placeholders, val_features, val_support,
                        val_labels, val_r_indices, val_c_indices, 0., is_train=BN_AS_TRAIN)
    test_feed_dict = construct_feed_dict(placeholders, test_features, test_support,
                        test_labels, test_r_indices, test_c_indices, 0., is_train=BN_AS_TRAIN)
    q_feed_dict = construct_feed_dict(placeholders, test_features, q_support,
                        q_labels, q_r_indices, q_c_indices, 0., is_train=BN_AS_TRAIN)
else:
    val_feed_dict = construct_feed_dict(placeholders, train_features, val_support,
                        val_labels, val_r_indices, val_c_indices, 0., is_train=BN_AS_TRAIN)
    test_feed_dict = construct_feed_dict(placeholders, train_features, test_support,
                        test_labels, test_r_indices, test_c_indices, 0., is_train=BN_AS_TRAIN)

# Collect all variables to be logged into summary
merged_summary = tf.summary.merge_all()

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if WRITESUMMARY:
    train_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/train', sess.graph)
    val_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/val')
else:
    train_summary_writer = None
    val_summary_writer = None

best_val_score = 0
best_train_score = 0
best_epoch_train_score = 0
best_val_loss = np.inf
best_epoch = 0
wait = 0

print('Training...')

for epoch in range(NB_EPOCH):
    t = time.time()

    # modify train_feed_dict with support dropout if needed
    if SUP_DO:
        # do not modify the first support, the self-connections one
        for i in range(1, len(train_support)):
            modified = support_dropout(train_support[i].copy(), SUP_DO, edge_drop=True)
            modified.data[...] = 1 # make it binary to normalize
            modified = normalize_nonsym_adj(modified)
            modified = sparse_to_tuple(modified)
            train_feed_dict.update({placeholders['support'][i]: modified})

    # run one iteration
    outs = sess.run([model.opt_op, model.loss, model.accuracy, model.confmat], feed_dict=train_feed_dict)
    
    train_avg_loss = outs[1]
    train_acc = outs[2]

    val_avg_loss, val_acc, conf = sess.run([model.loss, model.accuracy, model.confmat], feed_dict=val_feed_dict)

    if VERBOSE:
        print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss),
              "train_acc=", "{:.5f}".format(train_acc),
              "val_loss=", "{:.5f}".format(val_avg_loss),
              "val_acc=", "{:.5f}".format(val_acc),
              "\t\ttime=", "{:.5f}".format(time.time() - t))

    log_data['train']['loss'].append(float(train_avg_loss))
    log_data['train']['acc'].append(float(train_acc))
    log_data['val']['loss'].append(float(val_avg_loss))
    log_data['val']['acc'].append(float(val_acc))

    write_log(log_data, log_file)

    if val_acc > best_val_score:
        best_val_score = val_acc
        best_epoch = epoch
        best_epoch_train_score = train_acc
        saver = tf.train.Saver()
        save_path = saver.save(sess, "%s/best_epoch.ckpt" % (SUMMARIESDIR))

    if train_acc > best_train_score:
        best_train_score = train_acc

    if epoch % 2 == 0 and WRITESUMMARY:
        # Train set summary
        summary = sess.run(merged_summary, feed_dict=train_feed_dict)
        train_summary_writer.add_summary(summary, epoch)
        train_summary_writer.flush()

        # Validation set summary
        summary = sess.run(merged_summary, feed_dict=val_feed_dict)
        val_summary_writer.add_summary(summary, epoch)
        val_summary_writer.flush()

# store model
saver = tf.train.Saver()
save_path = saver.save(sess, "%s/%s.ckpt" % (SUMMARIESDIR, model.name), global_step=model.global_step)

if VERBOSE:
    print("\nOptimization Finished!")
    print('best validation score =', best_val_score, 'at iteration {}, with a train_score of {}'.format(best_epoch, best_epoch_train_score))

print('\nSETTINGS:\n')
for key, val in sorted(vars(ap.parse_args()).items()):
    print(key, val)

print('global seed = ', seed)

# For parsing results from file
results = vars(ap.parse_args()).copy()
results.update({'best_val_score': float(best_val_score), 'best_epoch': best_epoch})
results.update({'best_epoch_train_score': float(best_epoch_train_score)})
results.update({'best_train_score': float(best_train_score)})
results.update({'best_epoch': best_epoch})
results.update({'seed':seed})

print(json.dumps(results))

json_outfile = SUMMARIESDIR + '/' + 'results.json'
with open(json_outfile, 'w') as outfile:
    json.dump(results, outfile)

sess.close()

Settings:
{'dataset': 'amazon', 'learning_rate': 0.01, 'weight_decay': 0.0, 'epochs': 300, 'hidden': [256, 128, 64], 'dropout': 0.5, 'degree': 1, 'summaries_dir': 'D:/School/FARS/Dataset/Men/Men_also_bought/logs', 'support_dropout': 0.15, 'write_summary': False, 'batch_norm': True, 'amz_data': 'Men_also_bought'} 

initializing dataloader...
get phase: train
Sampling negative edges...
Sampling done, time elapsed: 8.231645584106445
get phase: valid
Sampling negative edges...
Sampling done, time elapsed: 0.9850690364837646
get phase: test
Sampling negative edges...
Sampling done, time elapsed: 1.1579179763793945
Computing adj matrices up to 1th degree
Computing adj matrices up to 1th degree
Computing adj matrices up to 1th degree
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Colocations handled automatically by placer.


  n_outputs = tf.layers.batch_normalization(n_outputs, training=self.is_train)


Instructions for updating:
Use `tf.cast` instead.
Training...
[*] Epoch: 0001 train_loss= 0.89660 train_acc= 0.49075 val_loss= 3.33368 val_acc= 0.50012 		time= 2.56440
[*] Epoch: 0002 train_loss= 0.86626 train_acc= 0.61596 val_loss= 3.60583 val_acc= 0.50015 		time= 1.74770
[*] Epoch: 0003 train_loss= 0.89652 train_acc= 0.56759 val_loss= 3.25967 val_acc= 0.50041 		time= 1.74864
[*] Epoch: 0004 train_loss= 0.64313 train_acc= 0.66064 val_loss= 3.61709 val_acc= 0.50315 		time= 2.23709
[*] Epoch: 0005 train_loss= 0.63108 train_acc= 0.67884 val_loss= 4.21360 val_acc= 0.50217 		time= 1.81747
[*] Epoch: 0006 train_loss= 0.59272 train_acc= 0.70171 val_loss= 4.94451 val_acc= 0.50239 		time= 2.05341
[*] Epoch: 0007 train_loss= 0.56435 train_acc= 0.71959 val_loss= 5.58396 val_acc= 0.50256 		time= 1.77487
[*] Epoch: 0008 train_loss= 0.54131 train_acc= 0.73302 val_loss= 6.00556 val_acc= 0.50198 		time= 1.80995
[*] Epoch: 0009 train_loss= 0.52013 train_acc= 0.74806 val_loss= 6.22837 val_acc= 0.50149 