## Introduction

This Jupyter notebook is created to reproduce the results and experiment abalations of paper "SEMI-SUPERVISED CLASSIFICATION WITH GRAPH CONVOLUTIONAL NETWORKS" by Thomas N. Kipf and Max Welling. It was published on "Proceedings of the International Conference on Learning Representations (ICLR)" and can be found at https://arxiv.org/pdf/1609.02907.pdf.

The authors of the paper proposed a semi-supervised classification algorithm based on graph convolutional networks (GCNs). And the original code can be found at https://github.com/tkipf/gcn. In this notebook, we reused most of the code and made a few modifications. 

The main modifications are:
1. We modified the code so that it works in compatibility mode with tensorflow 2.12.0.
2. We put the original code except `train.py` under `src/gcn` directory. And they were modified to be compatible with tensorflow 2.12.0.
3. TODO: ablations

## Data Download

The data is included in the original GCN repository. Thus, there is no need to download it again. It can be found under `gcn/data` directory.

## Data Overview

## Reproducibility Summary

By running this notebook, we are able to reproduce the results of GCN on all three datasets. It achieves the same accuracy as the original paper. TODO: ablations.

In [78]:
from gcn.inits import *
from gcn.utils import *
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)
# flags = tf.compat.v1.flags
# FLAGS = flags.FLAGS

## 1. Layers Class

### 1.1. Helper Functions

In [79]:
# global unique layer ID dictionary for layer name assignment
_LAYER_UIDS = {}


def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs."""
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]


def sparse_dropout(x, keep_prob, noise_shape):
    """Dropout for sparse tensors."""
    random_tensor = keep_prob
    random_tensor += tf.random_uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse_retain(x, dropout_mask)
    return pre_out * (1./keep_prob)


def dot(x, y, sparse=False):
    """Wrapper for tf.matmul (sparse vs dense)."""
    if sparse:
        res = tf.sparse_tensor_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res

### 1.2. Layer Class

In [80]:
class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    Implementation inspired by keras (http://keras.io).

    # Properties
        name: String, defines the variable scope of the layer.
        logging: Boolean, switches Tensorflow histogram logging on/off

    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
        _log_vars(): Log all variables
    """

    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs', inputs)
            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])

### 1.3. Dense Layer

In [81]:
class Dense(Layer):
    """Dense layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0., sparse_inputs=False,
                 act=tf.nn.relu, bias=False, featureless=False, **kwargs):
        super(Dense, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = glorot([input_dim, output_dim],
                                          name='weights')
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # transform
        output = dot(x, self.vars['weights'], sparse=self.sparse_inputs)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)

### 1.4 Graph Convolutional Layer

In [82]:
class GraphConvolution(Layer):
    """Graph convolution layer."""
    def __init__(self, input_dim, output_dim, placeholders, dropout=0.,
                 sparse_inputs=False, act=tf.nn.relu, bias=False,
                 featureless=False, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.support = placeholders['support']
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            for i in range(len(self.support)):
                self.vars['weights_' + str(i)] = glorot([input_dim, output_dim],
                                                        name='weights_' + str(i))
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        # dropout
        if self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        else:
            x = tf.nn.dropout(x, 1-self.dropout)

        # convolve
        supports = list()
        for i in range(len(self.support)):
            if not self.featureless:
                pre_sup = dot(x, self.vars['weights_' + str(i)],
                              sparse=self.sparse_inputs)
            else:
                pre_sup = self.vars['weights_' + str(i)]
            support = dot(self.support[i], pre_sup, sparse=True)
            supports.append(support)
        output = tf.add_n(supports)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)

## 2. Models

In [83]:
from gcn.metrics import *

# flags = tf.compat.v1.flags
# FLAGS = flags.FLAGS

In [84]:
class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.vars = {}
        self.placeholders = {}

        self.layers = []
        self.activations = []

        self.inputs = None
        self.outputs = None

        self.loss = 0
        self.accuracy = 0
        self.optimizer = None
        self.opt_op = None

    def _build(self):
        raise NotImplementedError

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()

        self.opt_op = self.optimizer.minimize(self.loss)

    def predict(self):
        pass

    def _loss(self):
        raise NotImplementedError

    def _accuracy(self):
        raise NotImplementedError

    def save(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = saver.save(sess, "tmp/%s.ckpt" % self.name)
        print("Model saved in file: %s" % save_path)

    def load(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = "tmp/%s.ckpt" % self.name
        saver.restore(sess, save_path)
        print("Model restored from file: %s" % save_path)

In [85]:
class MLP(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(MLP, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders

        self.optimizer = tf.train.AdamOptimizer(learning_rate=flags_learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += flags_weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):
        self.layers.append(Dense(input_dim=self.input_dim,
                                 output_dim=flags_hidden1,
                                 placeholders=self.placeholders,
                                 act=tf.nn.relu,
                                 dropout=True,
                                 sparse_inputs=True,
                                 logging=self.logging))

        self.layers.append(Dense(input_dim=flags_hidden1,
                                 output_dim=self.output_dim,
                                 placeholders=self.placeholders,
                                 act=lambda x: x,
                                 dropout=True,
                                 logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)

In [86]:
class GCN(Model):
    def __init__(self, placeholders, input_dim, **kwargs):
        super(GCN, self).__init__(**kwargs)

        self.inputs = placeholders['features']
        self.input_dim = input_dim
        # self.input_dim = self.inputs.get_shape().as_list()[1]  # To be supported in future Tensorflow versions
        self.output_dim = placeholders['labels'].get_shape().as_list()[1]
        self.placeholders = placeholders

        self.optimizer = flags_optimizer(learning_rate=flags_learning_rate)

        self.build()

    def _loss(self):
        # Weight decay loss
        for var in self.layers[0].vars.values():
            self.loss += flags_weight_decay * tf.nn.l2_loss(var)

        # Cross entropy error
        self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
                                                  self.placeholders['labels_mask'])

    def _accuracy(self):
        self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
                                        self.placeholders['labels_mask'])

    def _build(self):

        if flags_layers == 2:
            # Paper layer configuration
            self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                                output_dim=flags_hidden1,
                                                placeholders=self.placeholders,
                                                act=flags_act_func,
                                                dropout=True,
                                                sparse_inputs=True,
                                                logging=self.logging))

            self.layers.append(GraphConvolution(input_dim=flags_hidden1,
                                                output_dim=self.output_dim,
                                                placeholders=self.placeholders,
                                                act=lambda x: x,
                                                dropout=True,
                                                logging=self.logging))
        elif flags_layers == 1:
            # Single layer configuration
            self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                                output_dim=self.output_dim,
                                                placeholders=self.placeholders,
                                                act=flags_act_func,
                                                dropout=True,
                                                sparse_inputs=True,
                                                logging=self.logging))
        elif flags_layers == 3:
            # Triple layer configuration
            self.layers.append(GraphConvolution(input_dim=self.input_dim,
                                                output_dim=64,
                                                placeholders=self.placeholders,
                                                act=flags_act_func,
                                                dropout=True,
                                                sparse_inputs=True,
                                                logging=self.logging))

            self.layers.append(GraphConvolution(input_dim=64,
                                                output_dim=flags_hidden1,
                                                placeholders=self.placeholders,
                                                act=flags_act_func,
                                                dropout=True,
                                                logging=self.logging))

            self.layers.append(GraphConvolution(input_dim=flags_hidden1,
                                                output_dim=self.output_dim,
                                                placeholders=self.placeholders,
                                                act=lambda x: x,
                                                dropout=True,
                                                logging=self.logging))

    def predict(self):
        return tf.nn.softmax(self.outputs)

## 3. Train

In [87]:
from __future__ import division
from __future__ import print_function

import time
# import tensorflow as tf
# import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()

# # Set random seed
# seed = 123
# np.random.seed(seed)
# tf.set_random_seed(seed)


### 3.1 Training Parameters

In [88]:
# Settings
# flags = tf.compat.v1.flags
# FLAGS = flags.FLAGS
flags_dataset = 'cora'# , 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags_model = 'gcn' #, 'Model string.')  # 'gcn', 'gcn_cheby', 'dense'
flags_learning_rate = 0.01 #, 'Initial learning rate.')
flags_epochs = 200 #, 'Number of epochs to train.')
flags_hidden1 = 16 #, 'Number of units in hidden layer 1.')
flags_dropout = 0.5 #, 'Dropout rate (1 - keep probability).')
flags_weight_decay = 5e-4 #, 'Weight for L2 loss on embedding matrix.')
flags_early_stopping = 10 #, 'Tolerance for early stopping (# of epochs).')
flags_max_degree = 3 #, 'Maximum Chebyshev polynomial degree.')
flags_act_func = tf.nn.relu # Activation function: tf.nn.relu, tf.nn.leaky_relu, tf.nn.sigmoid, tf.nn.tanh, tf.nn.elu
flags_optimizer = tf.train.AdamOptimizer # Optimizer: tf.train.AdamOptimizer, tf.train.GradientDescentOptimizer, tf.train.AdadeltaOptimizer, tf.train.RMSPropOptimizer
flags_layers = 2 # layers: 1, 2, 3

### 3.2 Load Data and Train Model

In [89]:
# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders, sess, model):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)

def train(adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask):
    seed = 123
    np.random.seed(seed)
    tf.set_random_seed(seed)
    
    # Some preprocessing
    features = preprocess_features(features)
    if flags_model == 'gcn':
        support = [preprocess_adj(adj)]
        num_supports = 1
        model_func = GCN
    elif flags_model == 'gcn_cheby':
        support = chebyshev_polynomials(adj, flags_max_degree)
        num_supports = 1 + flags_max_degree
        model_func = GCN
    elif flags_model == 'dense':
        support = [preprocess_adj(adj)]  # Not used
        num_supports = 1
        model_func = MLP
    else:
        raise ValueError('Invalid argument for model: ' + str(flags_model))

    # Define placeholders
    placeholders = {
        'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
        'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
        'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
        'labels_mask': tf.placeholder(tf.int32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
    }

    # Create model
    model = model_func(placeholders, input_dim=features[2][1], logging=True)

    # Initialize session
    sess = tf.Session()
    # Init variables
    sess.run(tf.global_variables_initializer())

    cost_val = []

    t_begin = time.time()
    # print("start training '{}'...".format(flags_dataset))
    # Train model
    for epoch in range(flags_epochs):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders)
        feed_dict.update({placeholders['dropout']: flags_dropout})

        # Training step
        outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

        # Validation
        cost, acc, duration = evaluate(features, support, y_val, val_mask, placeholders, sess, model)
        cost_val.append(cost)

        # Print results
        # print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
        #     "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
        #     "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))
        if epoch % 5 == 0:
            print(".", end="")

        if epoch > flags_early_stopping and cost_val[-1] > np.mean(cost_val[-(flags_early_stopping+1):-1]):
            print("Early stopping...")
            break

    print("")

    # print("Optimization Finished!")

    # print("total train time {:.5f}".format(time.time() - t_begin))
    duration = time.time() - t_begin

    # Testing
    test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders, sess, model)
    return test_cost, test_acc, duration

    # print("[{}][{}][{}] Test set results:".format(flags_dataset, flags_act_func.__name__, flags_optimizer.__name__),
    #       "cost=", "{:.5f}".format(test_cost),
    #       "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))

In [90]:
dataset_list = ['cora', 'citeseer', 'pubmed']
optimizer_list = [tf.train.AdamOptimizer, tf.train.GradientDescentOptimizer, tf.train.AdadeltaOptimizer, tf.train.RMSPropOptimizer]
activation_list = [tf.nn.relu, tf.nn.leaky_relu, tf.nn.sigmoid, tf.nn.tanh, tf.nn.elu]
lr_list = [0.01, 0.99]
layers = [1, 2, 3]
result = {}

""" RESET GLOBALS """
_LAYER_UIDS = {}

# adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(flags_dataset)

for flags_dataset in dataset_list:
    print("=========== {} begin ===========".format(flags_dataset))
    if flags_dataset not in result.keys():
        result[flags_dataset] = {}
    print("+ [{}] activation function trial begin".format(flags_dataset))
    result[flags_dataset]['activation'] = {}
    for flags_act_func in activation_list:
        adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(flags_dataset)
        test_cost, test_acc, test_duration = train(adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask)
        print("+ + [{}][{}] Test set results: cost={cost:.5f}, accuracy={accuracy:.5f}, time={time:.5f}".format(
            flags_dataset, flags_act_func.__name__, cost=test_cost, accuracy=test_acc, time=test_duration))
        result[flags_dataset]['activation'][flags_act_func.__name__] = {"cost": test_cost, "accuracy": test_acc, "time": test_duration}
    # reset activation function
    flags_act_func = tf.nn.relu

    
    print("+ [{}] optimizer trial begin".format(flags_dataset))
    result[flags_dataset]['optimizer'] = {}
    for flags_optimizer in optimizer_list:
        result[flags_dataset]['optimizer'][flags_optimizer.__name__] = []
        for flags_learning_rate in lr_list:
            adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(flags_dataset)
            test_cost, test_acc, test_duration = train(adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask)
            print("+ + [{}][{}][{lr}] Test set results: cost={cost:.5f}, accuracy={accuracy:.5f}, time={time:.5f}".format(
                flags_dataset, flags_optimizer.__name__, cost=test_cost, accuracy=test_acc, time=test_duration, lr=flags_learning_rate))
            result[flags_dataset]['optimizer'][flags_optimizer.__name__].append({"cost": test_cost, "accuracy": test_acc, "time": test_duration})
    # reset optimizer
    flags_optimizer = tf.train.AdamOptimizer
    
    print("+ [{}] layers trial begin".format(flags_dataset))
    result[flags_dataset]['layers'] = {}
    for flags_layers in layers:
        adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(flags_dataset)
        test_cost, test_acc, test_duration = train(adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask)
        print("+ + [{}][{}] Test set results: cost={cost:.5f}, accuracy={accuracy:.5f}, time={time:.5f}".format(
            flags_dataset, flags_layers, cost=test_cost, accuracy=test_acc, time=test_duration))
        result[flags_dataset]['layers'][flags_layers] = {"cost": test_cost, "accuracy": test_acc, "time": test_duration}

    print("=========== {} end ===========".format(flags_dataset))

+ [cora] layers trial begin


  adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))


.....Early stopping...

+ + [cora][1] Test set results: cost=1.92891, accuracy=0.72500, time=1.71725
........................................
+ + [cora][2] Test set results: cost=1.13065, accuracy=0.80500, time=8.49623
........................................
+ + [cora][3] Test set results: cost=0.67888, accuracy=0.81600, time=9.93770
+ [citeseer] layers trial begin


  r_inv = np.power(rowsum, -1).flatten()


...Early stopping...

+ + [citeseer][1] Test set results: cost=1.79088, accuracy=0.53700, time=1.29467


In [None]:
print(result['cora']['optimizer'])

In [None]:
for dataset in result.keys():
    print(dataset)
    for ablation in result[dataset]:
        print("+", ablation)
        if ablation == 'optimizer':
            for opt in result[dataset][ablation]:
                print("+", "+", opt)
                for lr_idx in range(len(result[dataset][ablation][opt])):
                    print("+", "+", "+", "lr={}".format(lr_list[lr_idx]))
                    for key, val in result[dataset][ablation][opt][lr_idx].items():
                        print("+", "+", "+", "+", key, val)
        if ablation == 'activation':
            for act in result[dataset][ablation]:
                print("+", "+", act)
                for key, val in result[dataset][ablation][act].items():
                    print("+", "+", "+", key, val)
        if ablation == "layer":
            for layer in result[dataset][ablation]:
                print("+", "+", layer)
                for key, val in result[dataset][ablation][layer].items():
                    print("+", "+", "+", key, val)

In [None]:
"""
# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(flags_dataset)

# Some preprocessing
features = preprocess_features(features)
if flags_model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif flags_model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, flags_max_degree)
    num_supports = 1 + flags_max_degree
    model_func = GCN
elif flags_model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(flags_model))

# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)

# Initialize session
sess = tf.Session()


# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)


# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []

t_begin = time.time()
print("start training...")
# Train model
for epoch in range(flags_epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: flags_dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features, support, y_val, val_mask, placeholders)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    # if epoch > flags_early_stopping and cost_val[-1] > np.mean(cost_val[-(flags_early_stopping+1):-1]):
    #     print("Early stopping...")
    #     break

print("Optimization Finished!")

print("total train time {:.5f}".format(time.time() - t_begin))

# Testing
test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))
"""