In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
from gcn.model import GCN
from gcn.utils import *
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import time
import json
from collections import OrderedDict

### Settings

In [None]:
model_name = 'gcn'
dataset = 'cora' # 'citeseer', 'cora', 'pubmed', 'arx'
task = 'classification' # 'classification', 'link_prediction'
feat_norm = False
n_iter = 10

In [None]:
epochs = 200
learning_rate = 0.01
weight_decay = 5e-4
num_hidden = 32
dropout = 0.5
train_ratio = 0.5
val_ratio = train_ratio * 0.1 # len_val = len_train * val_ratio
train_val_ratio = [train_ratio, val_ratio]
n_diff_range = [1,2,3,4,5]
alpha_range = [0.1, 0.3, 0.5, 0.7, 0.9]

In [None]:
# n_diff = 3
# alpha = 0.5

In [None]:
# hparams = epochs, learning_rate, weight_decay, num_hidden, dropout, train_val_ratio, n_diff, alpha

### Import dataset

In [None]:
if task == 'link_prediction':
    adj, feature = load_data(dataset, task, feat_norm)
elif task == 'classification':
    adj, feature, labels = load_data(dataset, task, feat_norm)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = xm.xla_device()
device

device(type='cuda')

### GCN(iterative operation)

In [None]:
for n_diff in n_diff_range:
    for alpha in alpha_range:
        
        hparams = epochs, learning_rate, weight_decay, num_hidden, dropout, train_val_ratio, n_diff, alpha

        results = gcn_iter(adj, feature, labels, hparams, n_iter=n_iter)

        date = (datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
        hparams_add = hparams[:-2] + (feat_norm,)
        if n_diff == 1 and alpha == 0.5:
            experiment_mode = 'normal mode(n_diff={}, alpha={})'.format(n_diff, alpha)
        else:
            experiment_mode = 'simplified diffusion mode(n_diff={}, alpha={})'.format(n_diff, alpha)
        
        log_dict= OrderedDict()
        log_dict['model'] = model_name
        log_dict['dataset'] = dataset
        log_dict['datetime'] = date
        log_dict['simplified diffusion'] =  experiment_mode
        log_dict['setting_order'] = ['epochs', 'lr', 'weight_decay', 'num_hidden', 'dropout', 'train_val_ratio', 'feat_norm']
        log_dict['setting_value'] = hparams_add
        log_dict['iteration'] = n_iter
        log_dict['acc'] = results[0]
        log_dict['acc_mean'] = np.mean(results[0])
        log_dict['acc_std'] = np.std(results[0])
        log_dict['loss'] = results[1]
        log_dict['loss_mean'] = np.mean(results[1])
        log_dict['loss_std'] = np.std(results[1])
        log_dict['elapsed time(s)'] = np.round(results[2], 4)
        print(pd.DataFrame(log_dict.items(), columns=['key', 'value']))

        data = json.load(open('results_classification.json'))
        data.append(log_dict)
        with open('results_classification.json', 'w') as f:
            json.dump(data, f)
        print("Last data saved at: {}".format((datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")))
        print("Total data num: {}".format(len(data)))

Dataset: cora
Simdiff_params: n_diff =  1, alpha = 0.1
Start time: 2021-12-26 22:26:16
Experiment 1 - 1/200 val_acc: 0.2985 val_loss: 1.8507
Experiment 1 - 51/200 val_acc: 0.7910 val_loss: 0.7804
Experiment 1 - 101/200 val_acc: 0.7761 val_loss: 0.8074
Experiment 1 - 151/200 val_acc: 0.7761 val_loss: 0.8033
Experiment 1 result - test_acc: 0.8376, test_loss: 0.7747
Experiment 2 - 1/200 val_acc: 0.2985 val_loss: 1.9603
Experiment 2 - 51/200 val_acc: 0.8209 val_loss: 0.7408
Experiment 2 - 101/200 val_acc: 0.8060 val_loss: 0.9080
Experiment 2 - 151/200 val_acc: 0.7910 val_loss: 0.9404
Experiment 2 result - test_acc: 0.8314, test_loss: 0.7990
Experiment 3 - 1/200 val_acc: 0.2090 val_loss: 1.9480
Experiment 3 - 51/200 val_acc: 0.7910 val_loss: 0.8746
Experiment 3 - 101/200 val_acc: 0.7910 val_loss: 0.9083
Experiment 3 - 151/200 val_acc: 0.7910 val_loss: 0.9086
Experiment 3 result - test_acc: 0.8267, test_loss: 0.7343
Experiment 4 - 1/200 val_acc: 0.2687 val_loss: 1.7838
Experiment 4 - 51/200 

### GCN(single operation)

In [None]:
# results = gcn_iter(adj, feature, labels, hparams, n_iter=n_iter)

### Save log

In [None]:
# date = (datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
# hparams_add = hparams[:-2] + (feat_norm,)
# if n_diff == 1 and alpha == 0.5:
#     experiment_mode = 'normal mode(n_diff={}, alpha={})'.format(n_diff, alpha)
# else:
#     experiment_mode = 'simplified diffusion mode(n_diff={}, alpha={})'.format(n_diff, alpha)

In [None]:
# log_dict= OrderedDict()

In [None]:
# log_dict['model'] = model_name
# log_dict['dataset'] = dataset
# log_dict['date'] = date
# log_dict['simplified diffusion'] =  experiment_mode
# log_dict['setting_order'] = ['epochs', 'lr', 'weight_decay', 'num_hidden', 'dropout', 'train_val_ratio', 'feat_norm']
# log_dict['setting_value'] = hparams_add
# log_dict['iteration'] = n_iter
# log_dict['acc'] = results[0]
# log_dict['acc_mean'] = np.mean(results[0])
# log_dict['acc_std'] = np.std(results[0])
# log_dict['loss'] = results[1]
# log_dict['loss_mean'] = np.mean(results[1])
# log_dict['loss_std'] = np.std(results[1])
# log_dict['elapsed time(s)'] = np.round(results[2], 4)
# log_dict

In [None]:
# data = json.load(open('results_classification.json'))
# data.append(log_dict)

In [None]:
# with open('results_classification.json', 'w') as f:
#     json.dump(data, f)

### Define GCN

In [None]:
def gcn_iter(adj, feature, labels, hparams, n_iter=1):
    
    epochs, learning_rate, weight_decay, num_hidden, dropout, train_val_ratio, n_diff, alpha = hparams
    len_train= int(len(feature)*train_val_ratio[0])       
    len_val= int(len_train*train_val_ratio[1])
    len_test= len(feature)-(len_train+len_val)
    
    acc_results = []
    loss_results = []
    start = time.time()
    print("Dataset: {}".format(dataset))
    print("Simdiff_params: n_diff =  {}, alpha = {}".format(n_diff, alpha))
    print("Start time: {}".format((datetime.now() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")))
    
    adj = preprocess_graph_diff(adj, n_diff, alpha)
    
    for i in range(n_iter):
        idx_train, idx_val, idx_test = split(len(feature), len_train, len_val, len_test)
        adj, feature, labels = adj.to(device), feature.to(device), labels.to(device)
        model = GCN(nfeat=feature.shape[1], nhid=num_hidden, nclass=labels.max().item()+1, dropout=dropout)
        model= model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            output = model(feature, adj)
            train_loss = F.nll_loss(output[idx_train], labels[idx_train])
            train_acc = accuracy(output[idx_train], labels[idx_train])
            train_loss.backward()
            optimizer.step()
    
            model.eval()
            output = model(feature, adj)
            val_loss = F.nll_loss(output[idx_val], labels[idx_val])
            val_acc = accuracy(output[idx_val], labels[idx_val])
            
            if epoch % 50 == 0:
                print('Experiment {} - {}/{} val_acc: {:.4f} val_loss: {:.4f}'.format(\
                      i+1, epoch+1, epochs, val_acc, val_loss), end='\n', flush=False)
        model.eval()
        output = model(feature, adj)
        test_acc = accuracy(output[idx_test], labels[idx_test])
        test_loss = F.nll_loss(output[idx_test], labels[idx_test])
        acc_results.append(float(test_acc.detach().cpu().numpy()))
        loss_results.append(float(test_loss.detach().cpu().numpy()))
        
        print('Experiment {} result - test_acc: {:.4f}, test_loss: {:.4f}'.format(i+1, test_acc, test_loss), end='\n')
    elapsed_time = time.time()-start    
    print('All experiments finished!', '\nElapsed time: {:.2f}s'.format(elapsed_time), end='\n')
    return acc_results, loss_results, elapsed_time

In [None]:
# # # if wanna clear results
# empty = []
# with open('results_classification_rename.json', 'w') as f:
#     json.dump(empty, f)