In [1]:
# util
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
import sys


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)


def load_data(dataset_str):
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("/home/zihe-leon/Desktop/RobustGCN-master/data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("/home/zihe-leon/Desktop/RobustGCN-master/data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)
    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, labels


def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx

def perturb_features(features, ratio):
    features = features.toarray()
    pert_idx = np.random.choice(len(features), int(ratio*len(features)))
    perturbed = []
    for row in range(len(features)):
        if row in pert_idx:
            arr = features[row]
            # flip_mask = [0]*int(0.8*len(arr)) + [1]*(len(arr)-int(0.8*len(arr)))
            # np.random.shuffle(flip_mask)
            # flip_mask = np.array(flip_mask, dtype=bool)
            p_idx = np.random.choice(len(arr), int(0.2*len(arr)))
            arr[p_idx] = 1-arr[p_idx]
            # np.logical_not(arr, where=flip_mask, out=arr)    
            perturbed.append(arr)
        else:
            arr = features[row]
            perturbed.append(arr)
    perturbed_features = sp.csr_matrix(perturbed)
    return perturbed_features


def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    # print(features[0])
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return sparse_to_tuple(features)

def normalize_adj(adj, alpha):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, alpha).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

def preprocess_adj(adj, alpha):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]), alpha)
    return sparse_to_tuple(adj_normalized)


def construct_feed_dict(features, support, labels, labels_mask, placeholders, adj):
    """Construct feed dictionary."""
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['labels_mask']: labels_mask})
    feed_dict.update({placeholders['features']: features})
    feed_dict.update({placeholders['support'][i]: support[i] for i in range(len(support))})
    feed_dict.update({placeholders['num_features_nonzero']: features[1].shape})
    return feed_dict

def masked_softmax_cross_entropy(preds, labels, mask):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)


def masked_accuracy(preds, labels, mask):
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)


In [2]:
# layers
from gcn.inits import *
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()

flags = tf.app.flags
FLAGS = flags.FLAGS

_LAYER_UIDS = {}


def get_layer_uid(layer_name=''):
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]


def sparse_dropout(x, keep_prob, noise_shape):
    """Dropout for sparse tensors."""
    random_tensor = keep_prob
    random_tensor += tf.random_uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse_retain(x, dropout_mask)
    return pre_out * (1./keep_prob)


def dot(x, y, sparse=False):
    if sparse:
        res = tf.sparse_tensor_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res


class Layer(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs', inputs)
            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])

class GGCL_F(Layer):
    """GGCL: the input is feature"""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0.,
                 sparse_inputs=False, bias=False,
                 featureless=False, **kwargs):
        super(GGCL_F, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.support = placeholders['support']
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias
        self.output_dim = output_dim
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights_0'] = glorot([input_dim, output_dim], name='weights_0')
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)
        supports = list()
        i = 0
        if not self.featureless:
            pre_sup = dot(x, self.vars['weights_' + str(i)],
                          sparse=self.sparse_inputs)
        else:
            pre_sup = self.vars['weights_' + str(i)]
        support = dot(self.support[i], pre_sup, sparse=True)
        supports.append(support)
        dim = int(self.output_dim / 2)
        mean_vector = tf.nn.elu(tf.slice(pre_sup, [0, 0], [-1, dim]))
        var_vector = tf.nn.relu(tf.slice(pre_sup, [0, dim], [-1, dim]))
        self.vars['mean'] = mean_vector
        self.vars['var'] = var_vector
        node_weight = tf.exp(-var_vector*FLAGS.para_var)
        mean_out = dot(self.support[0], mean_vector * node_weight, sparse=True)
        var_out = dot(self.support[1], var_vector * node_weight * node_weight, sparse=True)
        print("*******************************************************************")
        print(var_vector)
        output = tf.concat([mean_out, var_out], axis=1)
        return output

class GGCL_D(Layer):
    """GGCL: the input is distribution"""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0.,
                 sparse_inputs=False, bias=False,
                 featureless=False, **kwargs):
        super(GGCL_D, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.
        self.support = placeholders['support']
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias
        self.dim = int(input_dim / 2)
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights_mean'] = glorot([self.dim, output_dim], name='weights_mean')
            self.vars['weights_var'] = glorot([self.dim, output_dim], name='weights_var')
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)
        mean_vector = tf.slice(x, [0, 0], [-1, self.dim])
        var_vector = tf.slice(x, [0, self.dim], [-1, self.dim])
        mean_vector = tf.nn.elu(dot(mean_vector, self.vars['weights_mean']))
        var_vector = tf.nn.relu(dot(var_vector, self.vars['weights_var']))
        node_weight = tf.exp(-var_vector*FLAGS.para_var)
        mean_out = dot(self.support[0], mean_vector * node_weight, sparse=True)
        var_out = dot(self.support[1], var_vector * node_weight * node_weight, sparse=True)
        self.vars['var'] = var_out
        sample_v = tf.random_normal(tf.shape(var_out), 0, 1,
                                    dtype=tf.float32)
        mean_out = mean_out + (tf.math.sqrt(var_out + 1e-8) * sample_v)
        self.vars['mean'] = tf.nn.softmax(mean_out)
        output = mean_out
        return output

2023-04-08 14:09:29.270026: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-08 14:09:29.390014: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-04-08 14:09:29.797269: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/zihe-leon/anaconda3/envs/gcn3/lib/
2023-04-08 14:09:29.797313: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_p

In [3]:
# model
from layers import *
from utils import *

flags = tf.app.flags
FLAGS = flags.FLAGS


class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.vars = {}
        self.placeholders = {}

        self.layers = []
        self.activations = []

        self.inputs = None
        self.outputs = None

        self.loss = 0
        self.accuracy = 0
        self.optimizer = None
        self.opt_op = None

    def _build(self):
        raise NotImplementedError

    def build(self):
        with tf.variable_scope(self.name):
            self._build()
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}
        self._loss()
        self._accuracy()
        self.opt_op = self.optimizer.minimize(self.loss)

    def predict(self):
        pass

    def _loss(self):
        raise NotImplementedError

    def _accuracy(self):
        raise NotImplementedError

    def save(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = saver.save(sess, "tmp/%s.ckpt" % self.name)
        print("Model saved in file: %s" % save_path)

    def load(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = "tmp/%s.ckpt" % self.name
        saver.restore(sess, save_path)
        print("Model restored from file: %s" % save_path)

class RGCN(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(RGCN, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders
        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
        self.build()

    def _loss(self):
        self.loss += FLAGS.para_l2 * (tf.nn.l2_loss(self.layers[0].vars['weights_0']) )
        mean = self.layers[0].vars['mean']
        var = self.layers[0].vars['var']
        KL_divergence = 0.5 * tf.reduce_mean(tf.square(mean) + var - tf.log(1e-8 + var) - 1, 1)
        KL_divergence = tf.reduce_sum(KL_divergence)
        self.loss += FLAGS.para_kl * KL_divergence
        self.vars = self.layers[1].vars['var']
        self.mean = self.layers[1].vars['mean']
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        mean_vector = tf.slice(self.outputs, [0, 0], [-1, self.output_dim])
        self.accuracy = masked_accuracy(mean_vector, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):
        self.layers.append(GGCL_F(input_dim=self.input_dim,
                                            output_dim=FLAGS.hidden,
                                            placeholders=self.placeholders,
                                            dropout=True,
                                            sparse_inputs=True,
                                            logging=self.logging))

        self.layers.append(GGCL_D(input_dim=FLAGS.hidden,
                                            output_dim=self.output_dim,
                                            placeholders=self.placeholders,
                                            dropout=True,
                                            logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)


In [4]:
# train
import time
import tensorflow.compat.v1 as tf
# tf.disable_eager_execution()
tf.config.run_functions_eagerly(True)
tf.enable_eager_execution()
from utils import *
from models import RGCN
import random
# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)
random.seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'cora', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 2, 'Number of epochs to train.')
flags.DEFINE_integer('hidden', 32, 'Number of units in hidden layer.')
flags.DEFINE_float('dropout', 0.6, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('para_var', 1, 'Parameter of variance-based attention')
flags.DEFINE_float('para_kl', 5e-4, 'Parameter of kl regularization')
flags.DEFINE_float('para_l2', 5e-4, 'Parameter for l2 loss.')
flags.DEFINE_integer('early_stopping', 20, 'Tolerance for early stopping (# of epochs).')

tf.compat.v1.flags.DEFINE_string('ip','','')
tf.compat.v1.flags.DEFINE_string('stdin','','')
tf.compat.v1.flags.DEFINE_string('control','','')
tf.compat.v1.flags.DEFINE_string('hb','','')




# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, label = load_data(FLAGS.dataset)
# add noise to the data
features = perturb_features(features, 0)
features = preprocess_features(features)

support = [preprocess_adj(adj, -0.5), preprocess_adj(adj, -1.0)]
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(2)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32),
}
model = RGCN(placeholders, input_dim=features[2][1], logging=True)
sess = tf.Session()
def evaluate(features, support, labels, mask, placeholders, adj):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders, adj)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)

sess.run(tf.global_variables_initializer())
cost_val = []
for epoch in range(FLAGS.epochs):
    t = time.time()
    feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders, adj)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    outs = sess.run([model.opt_op, model.loss, model.accuracy, model.vars], feed_dict=feed_dict)
    cost, _, duration = evaluate(features, support, y_val, val_mask, placeholders, adj)
    cost_val.append(cost)
    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "time=", "{:.5f}".format(time.time() - t))
    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break
print("Optimization Finished!")

# Testing
_, features, _, _, _, _, _, _, _ = load_data(FLAGS.dataset)
features = preprocess_features(features)
test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders, adj)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))

print(tf.Session().run(tf.constant([1,2,3])))

print(tf.Session().run(model.vars))

# print(model.vars.eval(session = tf.Session()))
import tensorflow as tf2

# tf2.print(model.vars, output_stream=sys.stderr)

tensor = tf2.range(10)
tf2.print(tensor)
print("~~~~~~~~~~~~~~~~~~~~~~~~")

UnrecognizedFlagError: Unknown command line flag 'Session.signature_scheme'