In [1]:
import sys
from argparse import ArgumentParser
import numpy as np
import torch
import torch_geometric as tg

from load_dataset import get_LP_dataset, get_NC_dataset, get_GC_dataset
from set_up_training import set_up_LP_training, set_up_NC_training
from prepare_data import prepare_LP_data, prepare_NC_data
from utils import weights_init, seed_everything
from execution import NC_run, LP_run, GC_run
from model import AdamGNN, GAE, InnerProductDecoder

import warnings
warnings.filterwarnings('ignore')
# %matplotlib inline

In [2]:
sys.argv = ['']  # execution on jupyter notebook
parser = ArgumentParser()
# general
parser.add_argument('--task', dest='task', default='LP', type=str,
                    help='LP; NC; GC')
parser.add_argument('--data_name', dest='data_name', default='emails', type=str,
                    help='cora; emails; ...')
parser.add_argument('--mode', dest='mode', default='baseline', type=str,
                    help='experiment mode. E.g., baseline or basemodel')
parser.add_argument('--model', dest='model', default='GCN', type=str,
                    help='model class name. E.g., GCN, PGNN, HCGNN...')
parser.add_argument('--local_agg_gnn', dest='local_agg_gnn', default='GCN', type=str,
                    help='GNN model used for primary node generation')
parser.add_argument('--fitness_mode', dest='fitness_mode', default='both_c', type=str,
                    help='how do we jointly use two fitness scores. E.g., c: \phi_c s: \phi_s both_j both_c')
parser.add_argument('--pooling_mode', dest='pooling_mode', default='att', type=str,
                    help='adaptive pooling mode. E.g., mean, max, att')
parser.add_argument('--num_levels', dest='num_levels', default=4, type=int,
                    help='number of hierarchical levels.')
parser.add_argument('--hid_dim', dest='hid_dim', default=64, type=int,
                    help='the hidden dimentin of neural network')
parser.add_argument('--cluster_range', dest='cluster_range', default=1, type=int,
                    help='number of hops to define the ego-network')
parser.add_argument('--overlap', dest='overlap', default=True, type=bool,
                    help='whether allow overlapping between different selected ego-networks')
parser.add_argument('--loss_mode', dest='loss_mode', default='all', type=str,
                    help='mode of loss fucntion. E.g., task, KL, R')
parser.add_argument('--batch_size', dest='batch_size', default=64, type=int,
                    help='batch_size')
parser.add_argument('--use_attribute', dest='use_attribute', default=True, type=bool,
                    help='wheter adopt attributes of dataset')
parser.add_argument('--output_mode', dest='output_mode', default='ATT', type=str,
                    help='the mode of flyback aggregator. E.g., ATT, MEAN, MAC, LSTM')
parser.add_argument('--gat_head', dest='gat_head', default=0, type=int,
                    help='the number of attention head if use GAT for primary node embedding generation')
parser.add_argument('--all_cluster', dest='all_cluster', default=True, type=bool,
                    help='whether pick all ego-networks that satisfy selection requirements')
parser.add_argument('--pooling_ratio', dest='pooling_ratio', default=1, type=int,
                    help='the ratio of selection if do not select all satisfied ego-networks')
parser.add_argument('--l2_regularization', dest='l2_regularization', default=5e-4, type=float,
                    help='l2 regularization value')
parser.add_argument('--edge_threshold', dest='edge_threshold', default=0, type=float,
                    help='filter weak edges')
parser.add_argument('--do_view', dest='do_view', default=False, type=bool,
                    help='whether present detailed training process')
parser.add_argument('--early_stop', dest='early_stop', default=100, type=int,
                    help='patience to wait for training')

parser.add_argument('--gpu', dest='gpu', default=True, type=bool,
                    help='whether use gpu')
parser.add_argument('--seed', dest='seed', default=123, type=int)

# dataset
parser.add_argument('--num_train', dest='num_train', default=80, type=float)
parser.add_argument('--use_features', dest='use_features', default=True, type=bool,
                    help='whether use node features')

# model
parser.add_argument('--lr', dest='lr', default=1e-2, type=float)
parser.add_argument('--num_epoch', dest='num_epoch', default=201, type=int)
parser.add_argument('--verbose', dest='verbose', default=1, type=int)
parser.add_argument('--relu', dest='relu', default=True, type=bool)
parser.add_argument('--dropout', dest='dropout', default=False, type=bool)
parser.add_argument('--drop_ratio', dest='drop_ratio', default=0.5, type=float)

args = parser.parse_args()

In [3]:
args.data_name = 'karate' # emails cora citeseer pubmed cs phisic computers photo karate
args.num_train = 1
args.local_agg_gnn = 'GCN' # GCN SAGE GIN
args.fitness_mode = 'both_c' # c: connect s: structure both_j both_c
args.pooling_mode = 'att' # 'mean' mean max att
args.num_levels = 2 # 1, 2, 3, 4
args.encoder_layers = 1
args.hid_dim = 64
args.overlap = True
args.cluster_range = 1 # 1
args.drop_ratio = 0.5 # 0.9
args.loss_mode = 'all' # nc recon kl all
args.output_mode = 'ATT' # ATT GCN GAT MEAN MAX LINEAR LSTM NONE
args.seed = 4329
args.batch_size = None
args.do_view = True

args.device = torch.device('cuda:' + str(0) if args.gpu and torch.cuda.is_available() else 'cpu')
seed_everything(args.seed)
print(args)

Namespace(all_cluster=True, batch_size=None, cluster_range=1, data_name='karate', device=device(type='cuda', index=0), do_view=True, drop_ratio=0.5, dropout=False, early_stop=100, edge_threshold=0, encoder_layers=1, fitness_mode='both_c', gat_head=0, gpu=True, hid_dim=64, l2_regularization=0.0005, local_agg_gnn='GCN', loss_mode='all', lr=0.01, mode='baseline', model='GCN', num_epoch=201, num_levels=2, num_train=1, output_mode='ATT', overlap=True, pooling_mode='att', pooling_ratio=1, relu=True, seed=4329, task='LP', use_attribute=True, use_features=True, verbose=1)


In [4]:
# load dataset
data = get_NC_dataset(
    dataset_name=args.data_name, use_features=args.use_attribute, seed=args.seed
)
# set up training mode
data = set_up_NC_training(
    data=data, num_train=args.num_train, seed=args.seed
)
# prepare data
data = prepare_NC_data(
    data=data, batch_size=args.batch_size,
    dense=False, device=args.device, seed=args.seed
)
norm = tg.transforms.NormalizeFeatures()
data = norm(data)

is reading karate dataset...
Train: 4 valid: 4 test: 26


## experiments

In [5]:
# set up model
model = AdamGNN(
    args=args, feat_dim=data.x.shape[1], out_dim=data.y.unique().size(0)
)
# release gpu memory
torch.cuda.empty_cache()
model = model.to(args.device)
print(model)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=args.lr, weight_decay=args.l2_regularization
)

AdamGNN(
  (encoder): Encoder(
    (convs): ModuleList(
      (0): ModuleList(
        (0): GCNConv(34, 64)
      )
      (1): ModuleList(
        (0): GCNConv(64, 64)
      )
    )
  )
  (pools): ModuleList(
    (0): Adaptive_pooling(
      (score_lin): Linear(in_features=128, out_features=1, bias=True)
      (pool_lin): Linear(in_features=128, out_features=1, bias=True)
    )
  )
  (out_cat): Merge_xs(
    (lin_att): Linear(in_features=128, out_features=1, bias=True)
  )
  (last_layer): Linear(in_features=64, out_features=4, bias=True)
)


In [6]:
NC_run(
    args=args, model=model, optimizer=optimizer, data=data
)


 Epoch 0 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  1.36139 1.75262 0.04498
Final loss:  1.58164
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 0, time 1.747
train Micro-F1 = 0.5000, valid Micro-F1 = 0.5000, Test Micro-F1 = 0.4615
best valid Micro-F1 is 0.5000, best test is 0.4615 and epoch_id is 0

 Epoch 1 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining node

Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 12, time 0.063
train Micro-F1 = 1.0000, valid Micro-F1 = 0.5000, Test Micro-F1 = 0.7692
best valid Micro-F1 is 0.7500, best test is 0.6154 and epoch_id is 4

 Epoch 13 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.47808 1.28685 0.05215
Final loss:  0.65892
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 13,

Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.33364 1.33933 0.0495
Final loss:  0.51707
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 24, time 0.065
train Micro-F1 = 1.0000, valid Micro-F1 = 1.0000, Test Micro-F1 = 0.8846
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 25 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.18455 1.18867 0.0465
Final loss:  0.34992
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated no

learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 35, time 0.065
train Micro-F1 = 1.0000, valid Micro-F1 = 0.5000, Test Micro-F1 = 0.8077
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 36 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.08735 1.54136 0.06375
Final loss:  0.30523
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 36, time 0.061
train Micro-F1 = 1.0000, valid Micro-F1 = 0.5000, Test Micro-F1 = 0.8077
best valid Micro-F1 is 1.0000, best t

Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 47, time 0.065
train Micro-F1 = 1.0000, valid Micro-F1 = 0.7500, Test Micro-F1 = 0.8077
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 48 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.06109 1.2207 0.04861
Final loss:  0.23176
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 48,

learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.0391 1.34466 0.06014
Final loss:  0.23371
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 59, time 0.062
train Micro-F1 = 1.0000, valid Micro-F1 = 0.5000, Test Micro-F1 = 0.8077
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 60 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.09454 1.16703 0.04145
Final loss:  0.2527
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4

Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.04809 1.14376 0.04897
Final loss:  0.21143
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 71, time 0.067
train Micro-F1 = 1.0000, valid Micro-F1 = 0.5000, Test Micro-F1 = 0.8077
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 72 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.055

learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.06641 1.28376 0.04163
Final loss:  0.23641
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 82, time 0.066
train Micro-F1 = 1.0000, valid Micro-F1 = 1.0000, Test Micro-F1 = 0.8077
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 83 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.06221 1.25963 0.04192
Final loss:  0.23009
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has

Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 93, time 0.068
train Micro-F1 = 1.0000, valid Micro-F1 = 0.7500, Test Micro-F1 = 0.8846
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 94 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.11254 1.27862 0.04677
Final loss:  0.28717
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 94

learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.01839 1.25763 0.04819
Final loss:  0.19234
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 105, time 0.064
train Micro-F1 = 1.0000, valid Micro-F1 = 1.0000, Test Micro-F1 = 0.8846
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 106 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.02088 1.46661 0.0459
Final loss:  0.21344
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph ha

Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 117, time 0.067
train Micro-F1 = 1.0000, valid Micro-F1 = 1.0000, Test Micro-F1 = 0.8846
best valid Micro-F1 is 1.0000, best test is 0.8846 and epoch_id is 18

 Epoch 118 starts !
--------------------------------------------------------------------------------
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Loss values:  0.02929 1.39475 0.03964
Final loss:  0.20841
Level 0 has 156 edges.
Origin graph has 34 nodes, 0 isolated nodes
Pooled graph has 4 nodes, 3 ego nodes, 1 remaining nodes
Level 1 has 12 edges.
learned embedding shape:4, transform from 4 to 34
after, embedding shape: 34
Evaluating Epoch 