In [1]:
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable

import argparse
import os

import numpy as np
import time
import random
from sklearn.metrics import f1_score
from collections import defaultdict
import networkx as nx

from encoders import Encoder
from aggregators import MeanAggregator
from model import SupervisedGraphSage
from dataset_utils import DataLoader
from utils import *

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.015,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=9e-4,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=64,
                    help='Number of hidden units.')
parser.add_argument('--num_samples', type=int, default=25,
                    help='Number of samples.')
parser.add_argument('--dataset', default='cora', help='Dataset name.')

_StoreAction(option_strings=['--dataset'], dest='dataset', nargs=None, const=None, default='cora', type=None, choices=None, help='Dataset name.', metavar=None)

In [3]:
args = parser.parse_args("")

In [4]:
np.random.seed(args.seed)
torch.manual_seed(args.seed)
random.seed(args.seed)

In [5]:
dname = args.dataset
dataset = DataLoader(dname)
data = dataset[0]

In [6]:
A_norm, A, X, labels, idx_train, idx_val, idx_test = load_citation_data(data)

In [7]:
G = nx.from_numpy_matrix(A)
feature_dictionary = {}

for i in np.arange(len(labels)):
    feature_dictionary[i] = labels[i]

nx.set_node_attributes(G, feature_dictionary, "attr_name")

In [8]:
sub_graphs = []

for i in np.arange(len(A)):
    s_indexes = []
    for j in np.arange(len(A)):
        s_indexes.append(i)
        if(A[i][j]==1):
            s_indexes.append(j)
    sub_graphs.append(G.subgraph(s_indexes))

subgraph_nodes_list = []

for i in np.arange(len(sub_graphs)):
    subgraph_nodes_list.append(list(sub_graphs[i].nodes))

In [9]:
sub_graphs_adj = []
for index in np.arange(len(sub_graphs)):
    sub_graphs_adj.append(nx.adjacency_matrix(sub_graphs[index]).toarray())

In [10]:
new_adj = torch.zeros(A.shape[0], A.shape[0])

for node in np.arange(len(subgraph_nodes_list)):
    sub_adj = sub_graphs_adj[node]
    for neighbors in np.arange(len(subgraph_nodes_list[node])):
        index = subgraph_nodes_list[node][neighbors]
        count = torch.tensor(0).float()
        if(index==node):
            continue
        else:
            c_neighbors = set(subgraph_nodes_list[node]).intersection(subgraph_nodes_list[index])
            if index in c_neighbors:
                nodes_list = subgraph_nodes_list[node]
                sub_graph_index = nodes_list.index(index)
                c_neighbors_list = list(c_neighbors)
                for i, item1 in enumerate(nodes_list):
                    if(item1 in c_neighbors):
                        for item2 in c_neighbors_list:
                            j = nodes_list.index(item2)
                            count += sub_adj[i][j]

            new_adj[node][index] = count/2
            new_adj[node][index] = new_adj[node][index]/(len(c_neighbors)*(len(c_neighbors)-1))
            new_adj[node][index] = new_adj[node][index] * (len(c_neighbors)**1)

In [11]:
weight = torch.FloatTensor(new_adj)
weight = weight / weight.sum(1, keepdim=True)

weight = weight + torch.FloatTensor(A)

coeff = weight.sum(1, keepdim=True)
coeff = torch.diag((coeff.T)[0])

weight = weight + coeff

In [12]:
weight = weight.detach().numpy()
adj = np.nan_to_num(weight, nan=0)

In [13]:
feat_data = np.array(X)

adj_lists = defaultdict(set)
for i in np.arange(len(sub_graphs)):
    adj_lists[i]=set(subgraph_nodes_list[i])

In [14]:
labels = labels.reshape(A.shape[0], 1)

In [15]:
full_nodes = torch.LongTensor(np.arange(A.shape[0]))

In [None]:
num_nodes = A.shape[0]
num_features = feat_data.shape[1]
features = nn.Embedding(num_nodes, num_features)
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
feat_data = torch.FloatTensor(feat_data)

agg1 = MeanAggregator(features, cuda=False)
enc1 = Encoder(features, num_features, args.hidden, adj_lists, adj, feat_data, agg1, cuda=False)
agg2 = MeanAggregator(lambda nodes : enc1(nodes, full_nodes).t(), cuda=False)
enc2 = Encoder(lambda nodes : enc1(nodes, full_nodes).t(), enc1.embed_dim, args.hidden, adj_lists, adj, feat_data, agg2,
               base_model=enc1, cuda=False)
enc1.num_samples = args.num_samples
enc2.num_samples = args.num_samples

num_classes = np.unique(labels).shape[0]
graphsage = SupervisedGraphSage(num_classes, enc2)

test_size = torch.count_nonzero(idx_test).item()
val_size = torch.count_nonzero(idx_val).item()
train_size = torch.count_nonzero(idx_train).item()

test = np.array(range(train_size+val_size, train_size+val_size+test_size))
val = np.array(range(train_size, train_size+val_size))
train = np.array(range(train_size))

optimizer = torch.optim.Adam(graphsage.parameters(), lr=args.lr, weight_decay=args.weight_decay) 

times = []
for batch in range(args.epochs):
    batch_nodes = train[:train_size]
    random.shuffle(train)
    start_time = time.time()
    optimizer.zero_grad()
    loss = graphsage.loss(batch_nodes, full_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)])))
    loss.backward()
    optimizer.step()
    end_time = time.time()
    times.append(end_time-start_time)

val_output = graphsage.forward(val, full_nodes) 
print("Validation Accuracy:", 100*f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro"))

test_output = graphsage.forward(test, full_nodes) 
print("Test Accuracy:", 100*f1_score(labels[test], test_output.data.numpy().argmax(axis=1), average="micro"))

  init.xavier_uniform(self.weight)
  init.xavier_uniform(self.weight)
