In [1]:
from model import GCN
from utils import *
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
import time
from sklearn.metrics import confusion_matrix

In [2]:
with open('space.labels', 'rb') as f:
    labels_num = pkl.load(f)

#### Import & preprocess dataset

In [3]:
dataset = 'space'
names = ['graph.jac.0.85', 'feature', 'labels']

In [4]:
adj, feature, labels = load_data(dataset, names)
# adj, feature are preprocessed already when load data

#### Setting

In [6]:
epochs = 200
learning_rate = 0.05
weight_decay = 5e-4
num_hidden = 32
dropout = 0.3
train_ratio = 0.8
val_ratio = train_ratio * 0. # len_val = len_train * val_ratio
train_val_ratio = [train_ratio, val_ratio]  
n_iter = 10

In [7]:
hparams = epochs, learning_rate, weight_decay, num_hidden, dropout, train_val_ratio

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [9]:
def gcn_iter(adj, feature, labels, hparams, n_iter=1):
    
    epochs, learning_rate, weight_decay, num_hidden, dropout, train_val_ratio = hparams
    len_train= int(len(feature)*train_val_ratio[0])       
    len_val= int(len_train*train_val_ratio[1])
    len_test= len(feature)-(len_train+len_val)
    
    acc_results = []
    loss_results = []
    start = time.time()
    
    for i in range(n_iter):
        idx_train, idx_val, idx_test = split(len(feature), len_train, len_val, len_test)
        model = GCN(nfeat=feature.shape[1], nhid=num_hidden, nclass=labels.max().item()+1, dropout=dropout)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            output = model(feature, adj)
            train_loss = F.nll_loss(output[idx_train], labels[idx_train])
            train_acc = accuracy(output[idx_train], labels[idx_train])
            train_loss.backward()
            optimizer.step()
    
            model.eval()
            output = model(feature, adj)
            val_loss = F.nll_loss(output[idx_val], labels[idx_val])
            val_acc = accuracy(output[idx_val], labels[idx_val])
            
            if epoch % 20 == 0:
                print('Experiment {} - {}/{} val_acc: {:.4f} val_loss: {:.4f}'.format(\
                      i+1, epoch+1, epochs, val_acc, val_loss), end='\r', flush=False)
        model.eval()
        output = model(feature, adj)
        test_acc = accuracy(output[idx_test], labels[idx_test])
        test_loss = F.nll_loss(output[idx_test], labels[idx_test])
        acc_results.append(float(test_acc.detach().numpy()))
        loss_results.append(float(test_loss.detach().numpy()))
        
        print('Experiment {} result - test_acc: {:.4f}, test_loss: {:.4f}'.format(i+1, test_acc, test_loss), end='\n')
        
    print('All experiments finished!', '\nElapsed time: {:.2f}s'.format(time.time()-start), end='\n')
    return acc_results, loss_results

In [10]:
results = gcn_iter(adj, feature, labels, hparams, n_iter=n_iter)
print('{:.2f} ± {:.2f}'.format(np.round(np.mean(results[0])*100, 2), np.round(np.std(results[0])*100, 2)))

Experiment 1 result - test_acc: 0.9400, test_loss: 0.3965
Experiment 2 result - test_acc: 0.9400, test_loss: 0.6683
Experiment 3 result - test_acc: 0.8400, test_loss: 1.1855
Experiment 4 result - test_acc: 0.9400, test_loss: 0.2280
Experiment 5 result - test_acc: 0.9200, test_loss: 0.8414
Experiment 6 result - test_acc: 0.9200, test_loss: 0.3463
Experiment 7 result - test_acc: 0.8800, test_loss: 0.5438
Experiment 8 result - test_acc: 0.8800, test_loss: 0.8657
Experiment 9 result - test_acc: 0.9200, test_loss: 0.4139
Experiment 10 result - test_acc: 0.8600, test_loss: 0.6452
All experiments finished! 
Elapsed time: 7.15s
90.40 ± 3.44


In [34]:
# print(np.round(np.mean(results[0])*100, 2),'±',np.round(np.std(results[0])*100, 2))