In [1]:
from keras.layers import Dense, Activation, Dropout, Reshape, concatenate, ReLU, Input
from keras.models import Model, Sequential
from keras.regularizers import l2, l1_l2
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.layers.normalization import BatchNormalization
from keras.constraints import unit_norm
from keras import optimizers
from keras import regularizers
from keras import initializers
import keras.backend as K
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from scipy.linalg import fractional_matrix_power
import tensorflow as tf
import numpy as np

from utils import *
from dfnets_optimizer import *
from dfnets_layer import DFNets

import warnings
warnings.filterwarnings('ignore')

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
X, A, Y = load_data(dataset='cora')
A = np.array(A.todense())

Loading cora dataset...
Dataset has 2708 nodes, 5429 edges, 1433 features.


In [3]:
_, Y_val, _, train_idx, val_idx, test_idx, train_mask = get_splits(Y)
train_idx = np.array(train_idx)
val_idx = np.array(val_idx)
test_idx = np.array(test_idx)
labels = np.argmax(Y, axis=1) + 1

# Normalize X
#X /= X.sum(1).reshape(-1, 1)
X = np.array(X)

In [4]:
Y_train = np.zeros(Y.shape)
labels_train = np.zeros(labels.shape)
Y_train[train_idx] = Y[train_idx]
labels_train[train_idx] = labels[train_idx]

Y_test = np.zeros(Y.shape)
labels_test = np.zeros(labels.shape)
Y_test[test_idx] = Y[test_idx]
labels_test[test_idx] = labels[test_idx]

In [5]:
# Identity matrix for self loop
I = np.matrix(np.eye(A.shape[0]))
A_hat = A + I

# Degree matrix
D_hat = np.array(np.sum(A_hat, axis=0))[0]
D_hat = np.matrix(np.diag(D_hat))

#Laplacian matrix
L = I - (fractional_matrix_power(D_hat, -0.5) * A_hat * fractional_matrix_power(D_hat, -0.5))
L = L - ((lmax(L)/2) * I)

In [6]:
lambda_cut = 0.5

def step(x, a):
    for index in range(len(x)):
        if(x[index] >= a):
            x[index] = float(1)
        else:
            x[index] = float(0)
    return x
    
response = lambda x: step(x, lmax(L)/2 - lambda_cut)

# Since the eigenvalues might change, sample eigenvalue domain uniformly
mu = np.linspace(0, lmax(L), 70)

#AR filter order (decrease radius for larger values)
Ka = 5

#MA filter order
Kb = 3

#for speed make small, for accuracy increase. Should be below 1 if the distributed implementation is used. 
#With the (faster) conj. gradient implementation, any radius is allowed.
radius = 0.90

b, a, rARMA, error = dfnets_coefficients_optimizer(mu, response, Kb, Ka, radius)


ECOS 2.0.7 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  +0.000e+00  -9.000e-01  +3e+02  9e-01  5e-01  1e+00  2e+00    ---    ---    1  1  - |  -  - 
 1  -1.558e+00  -1.896e+00  +1e+02  3e-01  2e-01  5e-01  7e-01  0.6114  7e-02   2  1  1 |  0  0
 2  -2.220e+01  -2.218e+01  +1e+02  3e-01  1e-01  5e-01  5e-01  0.9890  7e-01   2  2  2 |  0  0
 3  -2.340e-02  +3.835e-02  +1e+01  2e-02  5e-03  1e-01  6e-02  0.9642  9e-02   2  2  2 |  0  0
 4  -2.341e-01  -2.032e-01  +5e+00  2e-03  3e-03  5e-02  2e-02  0.7761  2e-01   2  3  3 |  0  0
 5  +4.778e-01  +4.830e-01  +9e-01  4e-04  5e-04  9e-03  4e-03  0.8274  9e-03   3  3  3 |  0  0
 6  +5.248e-01  +5.273e-01  +7e-01  3e-04  3e-04  5e-03  3e-03  0.4628  4e-01   3  3  3 |  0  0
 7  +6.410e-01  +6.412e-01  +7e-02  3e-05  3e-05  5e-04  3e-04  0.9109  2e-02   3  3  3 |  0  0
 8  +6.464e-01  +6.464e-01  +4e-02  2e-05  2e-

In [7]:
h_zero = np.zeros(L.shape[0])

def L_mult_numerator(coef):
    y = coef.item(0) * np.linalg.matrix_power(L, 0)
    for i in range(1, len(coef)):
        x = np.linalg.matrix_power(L, i)
        y = y + coef.item(i) * x

    return y

def L_mult_denominator(coef):
    y_d = h_zero
    for i in range(0, len(coef)):
        x_d = np.linalg.matrix_power(L, i+1)
        y_d = y_d + coef.item(i) * x_d
    
    return y_d

poly_num = L_mult_numerator(b)
poly_denom = L_mult_denominator(a)

arma_conv_AR = K.constant(poly_denom)
arma_conv_MA = K.constant(poly_num)

In [8]:
def dense_factor(inputs, input_signal, num_nodes, droput):
    
    h_1 = BatchNormalization()(inputs)
    h_1 = DFNets(num_nodes, 
                              arma_conv_AR, 
                              arma_conv_MA, 
                              input_signal, 
                              kernel_initializer=initializers.glorot_normal(seed=1), 
                              kernel_regularizer=l2(9e-2), 
                              kernel_constraint=unit_norm(),
                              use_bias=True,
                              bias_initializer=initializers.glorot_normal(seed=1), 
                              bias_constraint=unit_norm())(h_1)
    h_1 = ReLU()(h_1)
    output = Dropout(droput)(h_1)
    return output

In [9]:
def dense_block(inputs):

    concatenated_inputs = inputs
    
    num_nodes = [8, 16, 32, 64, 128]
    droput = [0.9, 0.9, 0.9, 0.9, 0.9]

    for i in range(5):
        x = dense_factor(concatenated_inputs, inputs, num_nodes[i], droput[i])
        concatenated_inputs = concatenate([concatenated_inputs, x], axis=1)

    return concatenated_inputs

In [10]:
def dense_block_model(x_train):
    
    inputs = Input((x_train.shape[1],))
    
    x = dense_block(inputs)

    predictions = Dense(7, kernel_initializer=initializers.glorot_normal(seed=1), 
                        kernel_regularizer=regularizers.l2(1e-10), 
                        kernel_constraint=unit_norm(), 
                        activity_regularizer=regularizers.l2(1e-10), 
                        use_bias=True, 
                        bias_initializer=initializers.glorot_normal(seed=1), 
                        bias_constraint=unit_norm(), 
                        activation='softmax', name='fc_'+str(1))(x)
    
    model = Model(input=inputs, output=predictions)
    
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.002), metrics=['acc'])
    
    return model

In [11]:
model_dense_block = dense_block_model(X)
model_dense_block.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1433)         0                                            
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 1433)         5732        input_1[0][0]                    
__________________________________________________________________________________________________
df_nets_1 (DFNets)              (None, 8)            22936       batch_normalization_1[0][0]      
__________________________________________________________________________________________________
re_lu_1 (ReLU)                  (None, 8)            0           df_nets_1[0][0]                  
__________________________________________________________________________________________________
dropout_1 

In [12]:
nb_epochs = 200

class_weight = class_weight.compute_class_weight('balanced', np.unique(labels_train), labels_train)
class_weight_dic = dict(enumerate(class_weight))

for epoch in range(nb_epochs):
    model_dense_block.fit(X, Y_train, sample_weight=train_mask, batch_size=A.shape[0], epochs=1, shuffle=False, 
                          class_weight=class_weight_dic, verbose=0)
    Y_pred = model_dense_block.predict(X, batch_size=A.shape[0])
    _, train_acc = evaluate_preds(Y_pred, [Y_train], [train_idx])
    _, test_acc = evaluate_preds(Y_pred, [Y_test], [test_idx])
    print("Epoch: {:04d}".format(epoch), "train_acc= {:.4f}".format(train_acc[0]), "test_acc= {:.4f}".format(test_acc[0]))

Epoch: 0000 train_acc= 0.4100 test_acc= 0.2990
Epoch: 0001 train_acc= 0.6650 test_acc= 0.4790
Epoch: 0002 train_acc= 0.7650 test_acc= 0.5910
Epoch: 0003 train_acc= 0.8050 test_acc= 0.6400
Epoch: 0004 train_acc= 0.8200 test_acc= 0.6750
Epoch: 0005 train_acc= 0.8300 test_acc= 0.7080
Epoch: 0006 train_acc= 0.8300 test_acc= 0.7260
Epoch: 0007 train_acc= 0.8350 test_acc= 0.7360
Epoch: 0008 train_acc= 0.8550 test_acc= 0.7490
Epoch: 0009 train_acc= 0.8750 test_acc= 0.7660
Epoch: 0010 train_acc= 0.8950 test_acc= 0.7830
Epoch: 0011 train_acc= 0.9000 test_acc= 0.8020
Epoch: 0012 train_acc= 0.9100 test_acc= 0.8110
Epoch: 0013 train_acc= 0.9300 test_acc= 0.8160
Epoch: 0014 train_acc= 0.9400 test_acc= 0.8210
Epoch: 0015 train_acc= 0.9500 test_acc= 0.8270
Epoch: 0016 train_acc= 0.9550 test_acc= 0.8290
Epoch: 0017 train_acc= 0.9550 test_acc= 0.8290
Epoch: 0018 train_acc= 0.9600 test_acc= 0.8280
Epoch: 0019 train_acc= 0.9600 test_acc= 0.8310
Epoch: 0020 train_acc= 0.9650 test_acc= 0.8350
Epoch: 0021 t

Epoch: 0175 train_acc= 0.9950 test_acc= 0.8550
Epoch: 0176 train_acc= 0.9950 test_acc= 0.8550
Epoch: 0177 train_acc= 0.9950 test_acc= 0.8520
Epoch: 0178 train_acc= 0.9950 test_acc= 0.8520
Epoch: 0179 train_acc= 0.9950 test_acc= 0.8510
Epoch: 0180 train_acc= 0.9950 test_acc= 0.8510
Epoch: 0181 train_acc= 0.9950 test_acc= 0.8490
Epoch: 0182 train_acc= 0.9950 test_acc= 0.8490
Epoch: 0183 train_acc= 0.9950 test_acc= 0.8480
Epoch: 0184 train_acc= 1.0000 test_acc= 0.8460
Epoch: 0185 train_acc= 1.0000 test_acc= 0.8480
Epoch: 0186 train_acc= 0.9950 test_acc= 0.8480
Epoch: 0187 train_acc= 0.9950 test_acc= 0.8450
Epoch: 0188 train_acc= 0.9950 test_acc= 0.8370
Epoch: 0189 train_acc= 0.9900 test_acc= 0.8300
Epoch: 0190 train_acc= 0.9900 test_acc= 0.8300
Epoch: 0191 train_acc= 0.9900 test_acc= 0.8330
Epoch: 0192 train_acc= 0.9900 test_acc= 0.8380
Epoch: 0193 train_acc= 0.9900 test_acc= 0.8400
Epoch: 0194 train_acc= 0.9900 test_acc= 0.8400
Epoch: 0195 train_acc= 0.9900 test_acc= 0.8430
Epoch: 0196 t