In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:

#include this at the top of any notebook to provide the path from which libraries are imported
import sys, os
libraries_dir = "/content/drive/MyDrive/libraries"
if libraries_dir not in sys.path:
  sys.path.append(libraries_dir)

base_dir = "/content/drive/My Drive/Colab Notebooks/projects/ogbn_arxiv_dgl"
if base_dir not in sys.path:
  sys.path.append(base_dir)
os.chdir(base_dir)

In [3]:
import torch
from torch import nn
import numpy as np

from data_factory import *
from model_factory import *
from evaluate import *

Using backend: pytorch


In [4]:
# Load ogbn-arxiv dataset
# Store graph onto GPU device
# Save dataset in directory root
dataset_name = "ogbn-arxiv"
device = "cuda:0"
root = "dataset/"
data_factory = DataFactory(dataset_name, device, root)

In [5]:
# Model hyperparameters
d_input = 128+40
d_hidden = 256
d_output = n_class = 40
n_layer = 3
n_head = 1
masked = True # Establish whether or not to include masked label features

# Initialize an instance of the model
model = GCN(d_input, d_output, d_hidden, n_layer, n_head, masked).to(device)
model.masked = masked
# Register the model name and directory so that model_factory will save the best model to "models_dir/name" during training
model_factory = ModelFactory(model, models_dir="saved_models", name="GCNMasked")

# This registers the training loss, valid metric, and test metric so that model_factory will save their values to disk during training
model_factory.add_loss_name("train", mode="min")
model_factory.add_loss_name("valid", mode="max")
model_factory.add_loss_name("test", mode="max")

# Print number of model parameters
model_factory.print_num_params()

Number of parameters (total): 449832
Number of parameters (requires grad): 449832
Number of parameters (no grad): 0


In [6]:
# Training hyperparameters
lr = 0.005 # learning rate
criterion = CrossEntropyLossSmooth # training loss
optimizer = torch.optim.Adam(model.parameters(), lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=500, verbose=True)

# This registers the optimizer and scheduler so that model_factory will save their state_dict to disk during training
model_factory.set_optimizer(optimizer)
model_factory.set_scheduler(scheduler)

In [7]:
epochs = 2000
log_every = 20

for epoch in range(epochs):
  # Train for one epoch
  loss_train = train(model_factory, data_factory, criterion)
  # Store last training loss to memory
  model_factory.append_loss("train", loss_train)

  # Validate
  valid_score = evaluate(model_factory, data_factory, dataset_name, split_name="valid")
  # Store last validation score to memory
  model_factory.append_loss("valid", valid_score)

  # Test
  test_score = evaluate(model_factory, data_factory, dataset_name, split_name="test")
  # Store last test score to memory
  model_factory.append_loss("test", test_score)

  # After each epoch, store the training loss, validation score, test score, optimizer state_dict, and scheduler state_dict to disk
  # If validation score is best, then also save model state_dict to disk
  model_factory.save_best("valid")

  # Log results periodically
  if epoch%log_every==0:
    # Print results from current epoch
    model_factory.print_last_loss(epoch)



epoch: 0 train 0.791914 valid 0.325615 test 0.291793 
best train 0.791914 best valid 0.325615 best test 0.291793  
epoch: 20 train 0.347259 valid 0.646263 test 0.634693 
best train 0.347259 best valid 0.646263 best test 0.634693  
epoch: 40 train 0.286574 valid 0.701332 test 0.680226 
best train 0.285883 best valid 0.701769 best test 0.687365  
epoch: 60 train 0.261968 valid 0.716702 test 0.691933 
best train 0.261968 best valid 0.718178 best test 0.705018  
epoch: 80 train 0.253079 valid 0.720830 test 0.697940 
best train 0.251838 best valid 0.727843 best test 0.712549  
epoch: 100 train 0.244770 valid 0.729286 test 0.711993 
best train 0.244770 best valid 0.735595 best test 0.721992  
epoch: 120 train 0.243000 valid 0.732911 test 0.716252 
best train 0.241510 best valid 0.735964 best test 0.723782  
epoch: 140 train 0.238099 valid 0.732206 test 0.708228 
best train 0.238041 best valid 0.737441 best test 0.723782  
epoch: 160 train 0.238177 valid 0.735595 test 0.715697 
best train 0.2