In [1]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np
import os

import torch
import torch.nn.functional as F
import torch.optim as optim
import networkx as nx
from scipy import sparse
from scipy.linalg import fractional_matrix_power

from utils import *
from models import Graphsn_GIN
from dataset_utils import DataLoader

import warnings
warnings.filterwarnings('ignore')

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.001,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=9e-3,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=64,
                    help='Number of hidden units.')
parser.add_argument('--early_stopping', type=int, default=100)
parser.add_argument('--train_rate', type=float, default=0.6)
parser.add_argument('--val_rate', type=float, default=0.2)
parser.add_argument('--dropout', type=float, default=0.95,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--dataset', default='cora', help='Dataset name.')
# parser.add_argument('--split', type=str, default='0', help='Random split number 0-9.')

_StoreAction(option_strings=['--dataset'], dest='dataset', nargs=None, const=None, default='cora', type=None, choices=None, help='Dataset name.', metavar=None)

In [3]:
args = parser.parse_args("")

In [4]:
np.random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x1d7fc636900>

In [5]:
dname = args.dataset
dataset = DataLoader(dname)
data = dataset[0]

train_rate = args.train_rate
val_rate = args.val_rate
percls_trn = int(round(train_rate*len(data.y)/dataset.num_classes))
val_lb = int(round(val_rate*len(data.y)))

permute_masks = random_planetoid_splits
data = permute_masks(data, dataset.num_classes, percls_trn, val_lb)

A_norm, A, X, labels, idx_train, idx_val, idx_test = load_citation_data(data)

In [6]:
G = nx.from_numpy_matrix(A)
feature_dictionary = {}

for i in np.arange(len(labels)):
    feature_dictionary[i] = labels[i]

nx.set_node_attributes(G, feature_dictionary, "attr_name")

In [7]:
sub_graphs = []

for i in np.arange(len(A)):
    s_indexes = []
    for j in np.arange(len(A)):
        s_indexes.append(i)
        if(A[i][j]==1):
            s_indexes.append(j)
    sub_graphs.append(G.subgraph(s_indexes))

subgraph_nodes_list = []

for i in np.arange(len(sub_graphs)):
    subgraph_nodes_list.append(list(sub_graphs[i].nodes))

In [8]:
sub_graphs_adj = []
for index in np.arange(len(sub_graphs)):
    sub_graphs_adj.append(nx.adjacency_matrix(sub_graphs[index]).toarray())

In [9]:
sub_graph_edges = []
for index in np.arange(len(sub_graphs)):
    sub_graph_edges.append(sub_graphs[index].number_of_edges())

In [10]:
new_adj = torch.zeros(A.shape[0], A.shape[0])

for node in np.arange(len(subgraph_nodes_list)):
    sub_adj = sub_graphs_adj[node]
    for neighbors in np.arange(len(subgraph_nodes_list[node])):
        index = subgraph_nodes_list[node][neighbors]
        count = torch.tensor(0).float()
        if(index==node):
            continue
        else:
            c_neighbors = set(subgraph_nodes_list[node]).intersection(subgraph_nodes_list[index])
            if index in c_neighbors:
                nodes_list = subgraph_nodes_list[node]
                sub_graph_index = nodes_list.index(index)
                c_neighbors_list = list(c_neighbors)
                for i, item1 in enumerate(nodes_list):
                    if(item1 in c_neighbors):
                        for item2 in c_neighbors_list:
                            j = nodes_list.index(item2)
                            count += sub_adj[i][j]

            new_adj[node][index] = count/2
            new_adj[node][index] = new_adj[node][index]/(len(c_neighbors)*(len(c_neighbors)-1))
            new_adj[node][index] = new_adj[node][index] * (len(c_neighbors)**1)

In [11]:
features = torch.FloatTensor(X)
labels = torch.LongTensor(labels) 

weight = torch.FloatTensor(new_adj)
weight = weight / weight.sum(1, keepdim=True)

weight = weight + torch.FloatTensor(A)

coeff = weight.sum(1, keepdim=True)
coeff = torch.diag((coeff.T)[0])

weight = weight + coeff

In [12]:
weight = weight.detach().numpy()
weight = np.nan_to_num(weight, nan=0)
adj = torch.FloatTensor(weight)

In [13]:
# Model and optimizer
model = Graphsn_GIN(nfeat=features.shape[1],
                    nhid=args.hidden,
                    nclass=labels.max().item() + 1,
                    dropout=args.dropout)

optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

In [14]:
def train(epoch):
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

In [15]:
def test():
    model.eval()
    logits = model(features, adj)
    accs, losses, preds = [], [], []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = accuracy(logits[mask], labels[mask])
        
        loss = F.nll_loss(logits[mask], labels[mask])

        preds.append(pred.detach().cpu())
        accs.append(acc)
        losses.append(loss.detach().cpu())
    return accs, preds, losses

In [16]:
Results0 = []
for i in range(10):
    
    best_val_acc = test_acc = 0
    best_val_loss = float('inf')
    val_loss_history = []
    val_acc_history = []

    for epoch in range(args.epochs):
        train(epoch)

        [train_acc, val_acc, tmp_test_acc], preds, [train_loss, val_loss, tmp_test_loss] = test()

        if val_loss < best_val_loss:
            best_val_acc = val_acc
            best_val_loss = val_loss
            test_acc = tmp_test_acc

        if epoch >= 0:
            val_loss_history.append(val_loss)
            val_acc_history.append(val_acc)
            if args.early_stopping > 0 and epoch > args.early_stopping:
                tmp = torch.tensor(
                    val_loss_history[-(args.early_stopping + 1):-1])
                if val_loss > tmp.mean().item():
                    break

    Results0.append([test_acc, best_val_acc])
    
test_acc_mean, val_acc_mean = np.mean(Results0, axis=0) * 100
test_acc_std = np.sqrt(np.var(Results0, axis=0)[0]) * 100
print(f'test acc mean = {test_acc_mean:.4f} \t test acc std = {test_acc_std:.4f} \t val acc mean = {val_acc_mean:.4f}')

test acc mean = 86.3547 	 test acc std = 0.2485 	 val acc mean = 87.6384
