In [1]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np
import os

import torch
import torch.nn.functional as F
import torch.optim as optim
import networkx as nx
from scipy import sparse
from scipy.linalg import fractional_matrix_power

from utils import *
from models import Graphsn_GIN
from dataset_utils import DataLoader

import warnings
warnings.filterwarnings('ignore')

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.002,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=9e-3,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=64,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.88,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset', default='cora', help='Dataset name.')

_StoreAction(option_strings=['--dataset'], dest='dataset', nargs=None, const=None, default='cora', type=None, choices=None, help='Dataset name.', metavar=None)

In [3]:
args = parser.parse_args("")

In [4]:
np.random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x1fc94ab5900>

In [5]:
dname = args.dataset
dataset = DataLoader(dname)
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [6]:
A_norm, A, X, labels, idx_train, idx_val, idx_test = load_citation_data(data)

In [8]:
G = nx.from_numpy_matrix(A)
feature_dictionary = {}

for i in np.arange(len(labels)):
    feature_dictionary[i] = labels[i]

nx.set_node_attributes(G, feature_dictionary, "attr_name")

In [9]:
sub_graphs = []

for i in np.arange(len(A)):
    s_indexes = []
    for j in np.arange(len(A)):
        s_indexes.append(i)
        if(A[i][j]==1):
            s_indexes.append(j)
    sub_graphs.append(G.subgraph(s_indexes))

subgraph_nodes_list = []

for i in np.arange(len(sub_graphs)):
    subgraph_nodes_list.append(list(sub_graphs[i].nodes))

In [10]:
sub_graphs_adj = []
for index in np.arange(len(sub_graphs)):
    sub_graphs_adj.append(nx.adjacency_matrix(sub_graphs[index]).toarray())

In [11]:
new_adj = torch.zeros(A.shape[0], A.shape[0])

for node in np.arange(len(subgraph_nodes_list)):
    sub_adj = sub_graphs_adj[node]
    for neighbors in np.arange(len(subgraph_nodes_list[node])):
        index = subgraph_nodes_list[node][neighbors]
        count = torch.tensor(0).float()
        if(index==node):
            continue
        else:
            c_neighbors = set(subgraph_nodes_list[node]).intersection(subgraph_nodes_list[index])
            if index in c_neighbors:
                nodes_list = subgraph_nodes_list[node]
                sub_graph_index = nodes_list.index(index)
                c_neighbors_list = list(c_neighbors)
                for i, item1 in enumerate(nodes_list):
                    if(item1 in c_neighbors):
                        for item2 in c_neighbors_list:
                            j = nodes_list.index(item2)
                            count += sub_adj[i][j]

            new_adj[node][index] = count/2
            new_adj[node][index] = new_adj[node][index]/(len(c_neighbors)*(len(c_neighbors)-1))
            new_adj[node][index] = new_adj[node][index] * (len(c_neighbors)**1)

In [15]:
features = torch.FloatTensor(X)
labels = torch.LongTensor(labels)

weight = torch.FloatTensor(new_adj)
weight = weight / weight.sum(1, keepdim=True)

weight = weight + torch.FloatTensor(A)

coeff = weight.sum(1, keepdim=True)
coeff = torch.diag((coeff.T)[0])

weight = weight + coeff

In [16]:
weight = weight.detach().numpy()
weight = np.nan_to_num(weight, nan=0)
adj = torch.FloatTensor(weight)

In [17]:
# Model and optimizer
model = Graphsn_GIN(nfeat=features.shape[1],
                    nhid=args.hidden,
                    nclass=labels.max().item() + 1,
                    dropout=args.dropout)

optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

In [18]:
def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    if not args.fastmode:
        # Evaluate validation set performance separately, deactivates dropout during validation run.
        model.eval()
        output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))

In [19]:
def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

In [20]:
# Train model
t_total = time.time()
for epoch in range(args.epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
test()

Epoch: 0001 loss_train: 2.0504 acc_train: 0.1786 loss_val: 1.8746 acc_val: 0.3440 time: 1.0253s
Epoch: 0002 loss_train: 1.9849 acc_train: 0.2071 loss_val: 1.8429 acc_val: 0.4140 time: 0.6827s
Epoch: 0003 loss_train: 1.8407 acc_train: 0.2500 loss_val: 1.8152 acc_val: 0.4720 time: 0.7530s
Epoch: 0004 loss_train: 1.7604 acc_train: 0.2929 loss_val: 1.7882 acc_val: 0.5640 time: 0.7181s
Epoch: 0005 loss_train: 1.7793 acc_train: 0.3643 loss_val: 1.7618 acc_val: 0.6020 time: 0.6423s
Epoch: 0006 loss_train: 1.6856 acc_train: 0.3857 loss_val: 1.7333 acc_val: 0.6380 time: 0.7679s
Epoch: 0007 loss_train: 1.6423 acc_train: 0.4714 loss_val: 1.7020 acc_val: 0.6780 time: 0.6732s
Epoch: 0008 loss_train: 1.5223 acc_train: 0.5143 loss_val: 1.6692 acc_val: 0.7160 time: 0.7310s
Epoch: 0009 loss_train: 1.6090 acc_train: 0.4000 loss_val: 1.6366 acc_val: 0.7300 time: 0.6293s
Epoch: 0010 loss_train: 1.5840 acc_train: 0.4571 loss_val: 1.6031 acc_val: 0.7360 time: 0.6154s
Epoch: 0011 loss_train: 1.3787 acc_train

Epoch: 0087 loss_train: 0.5396 acc_train: 0.7714 loss_val: 0.8238 acc_val: 0.7400 time: 0.9009s
Epoch: 0088 loss_train: 0.4567 acc_train: 0.7929 loss_val: 0.8218 acc_val: 0.7420 time: 0.8356s
Epoch: 0089 loss_train: 0.5112 acc_train: 0.8214 loss_val: 0.8198 acc_val: 0.7480 time: 0.9037s
Epoch: 0090 loss_train: 0.4606 acc_train: 0.8143 loss_val: 0.8178 acc_val: 0.7540 time: 0.8647s
Epoch: 0091 loss_train: 0.4749 acc_train: 0.8071 loss_val: 0.8157 acc_val: 0.7540 time: 0.8950s
Epoch: 0092 loss_train: 0.5906 acc_train: 0.7143 loss_val: 0.8137 acc_val: 0.7580 time: 0.8814s
Epoch: 0093 loss_train: 0.6091 acc_train: 0.7214 loss_val: 0.8119 acc_val: 0.7560 time: 0.8889s
Epoch: 0094 loss_train: 0.4917 acc_train: 0.8143 loss_val: 0.8101 acc_val: 0.7560 time: 0.8671s
Epoch: 0095 loss_train: 0.5069 acc_train: 0.7500 loss_val: 0.8093 acc_val: 0.7580 time: 0.8948s
Epoch: 0096 loss_train: 0.4429 acc_train: 0.8143 loss_val: 0.8081 acc_val: 0.7560 time: 0.8817s
Epoch: 0097 loss_train: 0.5176 acc_train

Epoch: 0173 loss_train: 0.4525 acc_train: 0.7714 loss_val: 0.8362 acc_val: 0.7480 time: 0.7709s
Epoch: 0174 loss_train: 0.3852 acc_train: 0.8214 loss_val: 0.8345 acc_val: 0.7480 time: 0.7420s
Epoch: 0175 loss_train: 0.4723 acc_train: 0.7643 loss_val: 0.8326 acc_val: 0.7500 time: 0.8418s
Epoch: 0176 loss_train: 0.4064 acc_train: 0.8429 loss_val: 0.8311 acc_val: 0.7460 time: 0.7505s
Epoch: 0177 loss_train: 0.4650 acc_train: 0.8000 loss_val: 0.8297 acc_val: 0.7480 time: 0.6732s
Epoch: 0178 loss_train: 0.5356 acc_train: 0.7571 loss_val: 0.8292 acc_val: 0.7500 time: 0.7377s
Epoch: 0179 loss_train: 0.4458 acc_train: 0.7929 loss_val: 0.8282 acc_val: 0.7520 time: 1.0133s
Epoch: 0180 loss_train: 0.4756 acc_train: 0.7714 loss_val: 0.8273 acc_val: 0.7520 time: 0.9674s
Epoch: 0181 loss_train: 0.4965 acc_train: 0.7857 loss_val: 0.8267 acc_val: 0.7560 time: 0.7829s
Epoch: 0182 loss_train: 0.4456 acc_train: 0.7857 loss_val: 0.8258 acc_val: 0.7560 time: 0.7261s
Epoch: 0183 loss_train: 0.4305 acc_train