In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:

#include this at the top of any notebook to provide the path from which libraries are imported
import sys, os
libraries_dir = "/content/drive/MyDrive/libraries"
if libraries_dir not in sys.path:
  sys.path.append(libraries_dir)

base_dir = "/content/drive/My Drive/Colab Notebooks/projects/ogbn_arxiv_dgl"
if base_dir not in sys.path:
  sys.path.append(base_dir)
os.chdir(base_dir)

In [3]:
import torch
from torch import nn
import numpy as np

from data_factory import *
from model_factory import *
from evaluate import *

Using backend: pytorch


In [4]:
# Load ogbn-arxiv dataset
# Store graph onto GPU device
# Save dataset in directory root
dataset_name = "ogbn-arxiv"
device = "cuda:0"
root = "dataset/"
data_factory = DataFactory(dataset_name, device, root)

In [5]:
# Model hyperparameters
d_input = 128
d_hidden = 256
d_output = n_class = 40
n_layer = 3
n_head = 3
masked = False # Establish whether or not to include masked label features

# Initialize an instance of the model
model = GAT(d_input, d_output, d_hidden, n_layer, n_head, masked).to(device)
model.masked = masked
# Register the model name and directory so that model_factory will save the best model to "models_dir/name" during training
model_factory = ModelFactory(model, models_dir="saved_models", name="GATNoMasked")

# This registers the training loss, valid metric, and test metric so that model_factory will save their values to disk during training
model_factory.add_loss_name("train", mode="min")
model_factory.add_loss_name("valid", mode="max")
model_factory.add_loss_name("test", mode="max")

# Print number of model parameters
model_factory.print_num_params()

Number of parameters (total): 1443960
Number of parameters (requires grad): 1443960
Number of parameters (no grad): 0


In [6]:
# Training hyperparameters
lr = 0.005 # learning rate
criterion = CrossEntropyLossSmooth # training loss
optimizer = torch.optim.Adam(model.parameters(), lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=500, verbose=True)

# This registers the optimizer and scheduler so that model_factory will save their state_dict to disk during training
model_factory.set_optimizer(optimizer)
model_factory.set_scheduler(scheduler)

In [7]:
epochs = 2000
log_every = 20

for epoch in range(epochs):
  # Train for one epoch
  loss_train = train(model_factory, data_factory, criterion)
  # Store last training loss to memory
  model_factory.append_loss("train", loss_train)

  # Validate
  valid_score = evaluate(model_factory, data_factory, dataset_name, split_name="valid")
  # Store last validation score to memory
  model_factory.append_loss("valid", valid_score)

  # Test
  test_score = evaluate(model_factory, data_factory, dataset_name, split_name="test")
  # Store last test score to memory
  model_factory.append_loss("test", test_score)

  # After each epoch, store the training loss, validation score, test score, optimizer state_dict, and scheduler state_dict to disk
  # If validation score is best, then also save model state_dict to disk
  model_factory.save_best("valid")

  # Log results periodically
  if epoch%log_every==0:
    # Print results from current epoch
    model_factory.print_last_loss(epoch)



epoch: 0 train 0.963896 valid 0.360012 test 0.359813 
epoch: 20 train 0.344041 valid 0.651230 test 0.658149 
epoch: 40 train 0.301368 valid 0.695325 test 0.694443 
epoch: 60 train 0.279452 valid 0.713984 test 0.705450 
epoch: 80 train 0.267231 valid 0.719890 test 0.709606 
epoch: 100 train 0.258905 valid 0.722105 test 0.709113 
epoch: 120 train 0.252202 valid 0.724286 test 0.705245 
epoch: 140 train 0.246985 valid 0.727810 test 0.711911 
epoch: 160 train 0.241945 valid 0.729051 test 0.713639 
epoch: 180 train 0.237439 valid 0.723112 test 0.699072 
epoch: 200 train 0.233442 valid 0.729722 test 0.714174 
epoch: 220 train 0.231104 valid 0.731467 test 0.714503 
epoch: 240 train 0.228228 valid 0.729152 test 0.708125 
epoch: 260 train 0.225132 valid 0.731736 test 0.714112 
epoch: 280 train 0.222360 valid 0.738078 test 0.721046 
epoch: 300 train 0.220258 valid 0.734085 test 0.713207 
epoch: 320 train 0.218966 valid 0.731467 test 0.710717 
epoch: 340 train 0.217399 valid 0.738112 test 0.724317