In [1]:
from keras.layers import Dense, Activation, Dropout, ReLU, Input
from keras.models import Model
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.constraints import unit_norm
from keras import optimizers
from keras import regularizers
from keras import initializers
import keras.backend as K
from sklearn.utils import class_weight
from scipy.linalg import fractional_matrix_power
import numpy as np
import networkx as nx

from utils import *
from gat_layer import GAT

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [2]:
#Read data.
A, X, Y_train, Y_val, Y_test, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask, Y = load_data('cora')
A = np.array(A.todense())

In [3]:
X = normalize_features(X)
X = X.todense()
X = np.array(X)

In [4]:
G = nx.from_numpy_matrix(A)
feature_dictionary = {}

for i in np.arange(len(Y)):
    feature_dictionary[i] = Y[i]

nx.set_node_attributes(G, feature_dictionary, "attr_name")

In [5]:
sub_graphs = []

A_array = A

for i in np.arange(len(A_array)):
    s_indexes = []
    for j in np.arange(len(A_array)):
        s_indexes.append(i)
        if(A_array[i][j]==1):
            s_indexes.append(j)
    sub_graphs.append(G.subgraph(s_indexes))

subgraph_nodes_list = []

for i in np.arange(len(sub_graphs)):
    subgraph_nodes_list.append(list(sub_graphs[i].nodes))

In [6]:
sub_graphs_adj = []
for index in np.arange(len(sub_graphs)):
    sub_graphs_adj.append(nx.adjacency_matrix(sub_graphs[index]).toarray())

In [7]:
sub_graph_edges = []
for index in np.arange(len(sub_graphs)):
    sub_graph_edges.append(sub_graphs[index].number_of_edges())

In [8]:
new_adj = np.zeros((A_array.shape[0], A_array.shape[0]))

for node in np.arange(len(subgraph_nodes_list)):
    sub_adj = sub_graphs_adj[node]
    for neighbors in np.arange(len(subgraph_nodes_list[node])):
        index = subgraph_nodes_list[node][neighbors]
        count = 0
        if(index==node):
            continue
        else:
            c_neighbors = set(subgraph_nodes_list[node]).intersection(subgraph_nodes_list[index])
            if index in c_neighbors:
                nodes_list = subgraph_nodes_list[node]
                sub_graph_index = nodes_list.index(index)
                c_neighbors_list = list(c_neighbors)
                for i, item1 in enumerate(nodes_list):
                    if(item1 in c_neighbors):
                        for item2 in c_neighbors_list:
                            j = nodes_list.index(item2)
                            count += sub_adj[i][j]

            new_adj[node][index] = count/2
            new_adj[node][index] = new_adj[node][index]/(len(c_neighbors)*(len(c_neighbors)-1))
            new_adj[node][index] = new_adj[node][index] * (len(c_neighbors)**1)

In [9]:
labels = np.argmax(Y, axis=1) + 1
labels_train = np.zeros(labels.shape)
labels_train[train_idx] = labels[train_idx]

In [10]:
weight = new_adj / new_adj.sum(axis=1, keepdims=True)

weight = weight + A
coeff = weight.sum(axis=1)
coeff = np.diag(coeff)

weight = weight + coeff
weight = np.nan_to_num(weight, nan=0)

In [11]:
row_sum = np.array(np.sum(weight, axis=1))
degree_matrix = np.matrix(np.diag(row_sum+1))

D = fractional_matrix_power(degree_matrix, -0.5)
adj = D.dot(weight).dot(D)

In [12]:
def att_factor(inputs, num_nodes, dropout):
    
    h_1 = BatchNormalization()(inputs)
    h_1 = GAT(num_nodes, 
              adj, 
              num_attention_heads=8,
              attention_combine='concat', 
              attention_dropout=0.6,
              kernel_initializer=initializers.glorot_normal(seed=1), 
              kernel_regularizer=l2(6e-3),
              kernel_constraint=unit_norm(),
              use_bias=True,
              bias_initializer=initializers.glorot_normal(seed=1), 
              bias_constraint=unit_norm())(h_1)
    h_1 = ReLU()(h_1)
    output = Dropout(dropout)(h_1)
    return output

In [13]:
def att_block(inputs):

    x = inputs
    
    num_nodes = [64]
    dropout = [0.6]

    for i in range(1):
        x = att_factor(x, num_nodes[i], dropout[i])

    return x

In [14]:
def att_block_model(x_train):
    
    inputs = Input((x_train.shape[1],))

    x = att_block(inputs)
    
    predictions = Dense(7, kernel_initializer=initializers.glorot_normal(seed=1), 
                        kernel_regularizer=regularizers.l2(1e-10),
                        kernel_constraint=unit_norm(), 
                        activity_regularizer=regularizers.l2(1e-10),
                        use_bias=True, 
                        bias_initializer=initializers.glorot_normal(seed=1), 
                        bias_constraint=unit_norm(), 
                        activation='softmax', name='fc_'+str(1))(x)

    model = Model(input=inputs, output=predictions)
    
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['acc'])
    
    return model

In [15]:
model = att_block_model(X)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1433)              0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 1433)              5732      
_________________________________________________________________
gat_1 (GAT)                  (None, 512)               735248    
_________________________________________________________________
re_lu_1 (ReLU)               (None, 512)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 512)               0         
_________________________________________________________________
fc_1 (Dense)                 (None, 7)                 3591      
Total params: 744,571
Trainable params: 741,705
Non-trainable params: 2,866
_________________________________________________________________


In [16]:
nb_epochs = 200

class_weight = class_weight.compute_class_weight('balanced', np.unique(labels_train), labels_train)
class_weight_dic = dict(enumerate(class_weight))

for epoch in range(nb_epochs):
    model.fit(X, Y_train, sample_weight=train_mask, batch_size=A.shape[0], epochs=1, shuffle=False, 
              class_weight=class_weight_dic, verbose=0)
    Y_pred = model.predict(X, batch_size=A.shape[0])
    _, train_acc = evaluate_preds(Y_pred, [Y_train], [train_idx])
    _, val_acc = evaluate_preds(Y_pred, [Y_val], [val_idx])
    _, test_acc = evaluate_preds(Y_pred, [Y_test], [test_idx])
    print("Epoch: {:04d}".format(epoch), "train_acc= {:.4f}".format(train_acc[0]), "test_acc= {:.4f}".format(test_acc[0]))

Epoch: 0000 train_acc= 0.1500 test_acc= 0.1162
Epoch: 0001 train_acc= 0.2143 test_acc= 0.1353
Epoch: 0002 train_acc= 0.2643 test_acc= 0.1581
Epoch: 0003 train_acc= 0.2786 test_acc= 0.1728
Epoch: 0004 train_acc= 0.4000 test_acc= 0.2051
Epoch: 0005 train_acc= 0.5286 test_acc= 0.2596
Epoch: 0006 train_acc= 0.6286 test_acc= 0.3419
Epoch: 0007 train_acc= 0.7143 test_acc= 0.4154
Epoch: 0008 train_acc= 0.8286 test_acc= 0.5147
Epoch: 0009 train_acc= 0.8857 test_acc= 0.5846
Epoch: 0010 train_acc= 0.9286 test_acc= 0.6559
Epoch: 0011 train_acc= 0.9429 test_acc= 0.6993
Epoch: 0012 train_acc= 0.9500 test_acc= 0.7331
Epoch: 0013 train_acc= 0.9571 test_acc= 0.7640
Epoch: 0014 train_acc= 0.9571 test_acc= 0.7824
Epoch: 0015 train_acc= 0.9571 test_acc= 0.7912
Epoch: 0016 train_acc= 0.9571 test_acc= 0.8037
Epoch: 0017 train_acc= 0.9643 test_acc= 0.8037
Epoch: 0018 train_acc= 0.9643 test_acc= 0.8103
Epoch: 0019 train_acc= 0.9714 test_acc= 0.8169
Epoch: 0020 train_acc= 0.9714 test_acc= 0.8221
Epoch: 0021 t

Epoch: 0175 train_acc= 1.0000 test_acc= 0.8103
Epoch: 0176 train_acc= 1.0000 test_acc= 0.8059
Epoch: 0177 train_acc= 1.0000 test_acc= 0.8074
Epoch: 0178 train_acc= 1.0000 test_acc= 0.8081
Epoch: 0179 train_acc= 1.0000 test_acc= 0.8110
Epoch: 0180 train_acc= 1.0000 test_acc= 0.8118
Epoch: 0181 train_acc= 1.0000 test_acc= 0.8125
Epoch: 0182 train_acc= 0.9929 test_acc= 0.8118
Epoch: 0183 train_acc= 0.9929 test_acc= 0.8132
Epoch: 0184 train_acc= 0.9929 test_acc= 0.8132
Epoch: 0185 train_acc= 0.9929 test_acc= 0.8154
Epoch: 0186 train_acc= 1.0000 test_acc= 0.8162
Epoch: 0187 train_acc= 1.0000 test_acc= 0.8140
Epoch: 0188 train_acc= 1.0000 test_acc= 0.8125
Epoch: 0189 train_acc= 0.9929 test_acc= 0.8125
Epoch: 0190 train_acc= 0.9929 test_acc= 0.8140
Epoch: 0191 train_acc= 0.9929 test_acc= 0.8132
Epoch: 0192 train_acc= 0.9929 test_acc= 0.8125
Epoch: 0193 train_acc= 0.9929 test_acc= 0.8103
Epoch: 0194 train_acc= 0.9929 test_acc= 0.8110
Epoch: 0195 train_acc= 0.9929 test_acc= 0.8096
Epoch: 0196 t