In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:

#include this at the top of any notebook to provide the path from which libraries are imported
import sys, os
libraries_dir = "/content/drive/MyDrive/libraries"
if libraries_dir not in sys.path:
  sys.path.append(libraries_dir)

base_dir = "/content/drive/My Drive/Colab Notebooks/projects/ogbn_arxiv_dgl"
if base_dir not in sys.path:
  sys.path.append(base_dir)
os.chdir(base_dir)

In [3]:
import torch
from torch import nn
import numpy as np

from data_factory import *
from model_factory import *
from evaluate import *

Using backend: pytorch


In [4]:
# Load ogbn-arxiv dataset
# Store graph onto GPU device
# Save dataset in directory root
dataset_name = "ogbn-arxiv"
device = "cuda:0"
root = "dataset/"
data_factory = DataFactory(dataset_name, device, root)

In [5]:
# Model hyperparameters
d_input = 128
d_output = 40
masked = False # Establish whether or not to include masked label features

# Initialize an instance of the model
model = LinearModel(d_input, d_output).to(device)
model.masked = masked
# Register the model name and directory so that model_factory will save the best model to "models_dir/name" during training
model_factory = ModelFactory(model, models_dir="saved_models", name="Linear")

# This registers the training loss, valid metric, and test metric so that model_factory will save their values to disk during training
model_factory.add_loss_name("train", mode="min")
model_factory.add_loss_name("valid", mode="max")
model_factory.add_loss_name("test", mode="max")

# Print number of model parameters
model_factory.print_num_params()

Number of parameters (total): 5160
Number of parameters (requires grad): 5160
Number of parameters (no grad): 0


In [6]:
# Training hyperparameters
lr = 0.005 # learning rate
criterion = CrossEntropyLossSmooth # training loss
optimizer = torch.optim.Adam(model.parameters(), lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=500, verbose=True)

# This registers the optimizer and scheduler so that model_factory will save their state_dict to disk during training
model_factory.set_optimizer(optimizer)
model_factory.set_scheduler(scheduler)

In [7]:
epochs = 2000
log_every = 20

for epoch in range(epochs):
  # Train for one epoch
  loss_train = train(model_factory, data_factory, criterion)
  # Store last training loss to memory
  model_factory.append_loss("train", loss_train)

  # Validate
  valid_score = evaluate(model_factory, data_factory, dataset_name, split_name="valid")
  # Store last validation score to memory
  model_factory.append_loss("valid", valid_score)

  # Test
  test_score = evaluate(model_factory, data_factory, dataset_name, split_name="test")
  # Store last test score to memory
  model_factory.append_loss("test", test_score)

  # After each epoch, store the training loss, validation score, test score, optimizer state_dict, and scheduler state_dict to disk
  # If validation score is best, then also save model state_dict to disk
  model_factory.save_best("valid")

  # Log results periodically
  if epoch%log_every==0:
    # Print results from current epoch
    model_factory.print_last_loss(epoch)



epoch: 0 train 0.777956 valid 0.210208 test 0.194597 
epoch: 20 train 0.692488 valid 0.271452 test 0.244306 
epoch: 40 train 0.652286 valid 0.129501 test 0.105961 
epoch: 60 train 0.621839 valid 0.273466 test 0.242207 
epoch: 80 train 0.596028 valid 0.318736 test 0.287945 
epoch: 100 train 0.574045 valid 0.367764 test 0.337222 
epoch: 120 train 0.555298 valid 0.403034 test 0.375347 
epoch: 140 train 0.539296 valid 0.431659 test 0.403452 
epoch: 160 train 0.525615 valid 0.450820 test 0.424398 
epoch: 180 train 0.513899 valid 0.464378 test 0.439952 
epoch: 200 train 0.503841 valid 0.474445 test 0.451968 
epoch: 220 train 0.495171 valid 0.484848 test 0.461864 
epoch: 240 train 0.487651 valid 0.491325 test 0.468695 
epoch: 260 train 0.481080 valid 0.496862 test 0.475115 
epoch: 280 train 0.475294 valid 0.501628 test 0.480444 
epoch: 300 train 0.470160 valid 0.506057 test 0.484271 
epoch: 320 train 0.465573 valid 0.509514 test 0.487151 
epoch: 340 train 0.461450 valid 0.512802 test 0.490278