In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch
from args import parse_args
from baselines import *
from data_utils import (
    get_data_stat,
    get_natural_imbalanced_split_data,
    get_step_imbalanced_split_data,
    load_data,
)
from bat import BatAugmenter
from trainer import NodeClassificationTrainer
from utils import get_model, get_device, print_centered

  from .autonotebook import tqdm as notebook_tqdm


# Running Experiment

This script will run experiments on all the combinations of following settings (specified by the global variables below):

| Setting         | (Default) Values                           | Description                                                        |
| --------------- | ------------------------------------------ | ------------------------------------------------------------------ |
| `DATASET_SPACE` | `['cora', 'citeseer', 'pubmed']`           | The datasets to use.                                               |
| `IMB_TYPES`     | `{'step': [10, 20], 'natural': [50, 100]}` | The imbalance types and ratios.                                    |
| `BAT_MODES`    | `['dummy', 'bat0', 'bat1']`                 | The BAT modes to test, `dummy` means no topological augmentation. |

For other settings, we use the default values specified in `config.yaml`.

In [8]:
""" Experiment Setup """

import sys

sys.argv = [""]
args = parse_args()

DATASET_SPACE = ["citeseer"]
MODE_SPACE = ["dummy", "bat1"]
IMB_SPACE = {
    "step": [10],
    "natural": [50],
}

In [9]:
device = get_device(args.gpu_id)
log_width = 100

print(
    f"Run experiment with\n"
    f"  - Datasets:        {DATASET_SPACE}\n"
    f"  - BAT modes:      {MODE_SPACE}\n"
    f"  - Imbalance types: {IMB_SPACE}\n"
)

print_centered("Arguments", 40, fillchar="=")
kwlen = max([len(k) for k in args.__dict__.keys()]) + 1
for keys, values in args.__dict__.items():
    print(f"{keys:{kwlen}}: {values}")
print_centered("", 40, fillchar="=")

# run the experiment

for imb_type in IMB_SPACE.keys():  # loop over imbalance types

    for imb_ratio in IMB_SPACE[imb_type]:  # loop over imbalance ratios

        print_centered(
            f"Experiment: Imbalance Type [{imb_type.title()}] - Ratio [{imb_ratio}]",
            log_width,
            fillchar="/",
            prefix="\n",
        )

        for dataset in DATASET_SPACE:  # loop over datasets

            print_centered(
                f"Dataset [{dataset.title()}] - Independent Runs [{args.n_runs}]", log_width, fillchar="=", prefix="\n"
            )

            args.imb_type = imb_type
            args.imb_ratio = imb_ratio
            args.dataset = dataset

            for bat_mode in MODE_SPACE:  # loop over BAT modes

                print_centered(
                    f"Setting: Dataset [{args.dataset.title()}] - {args.imb_type.title()}IR [{args.imb_ratio}] - BAT [{bat_mode}]",
                    log_width,
                    fillchar="=",
                )

                best_results = []
                for i_run in range(1, args.n_runs + 1):
                    seed = args.seed + i_run

                    # load imbalanced data
                    data = load_data(args.dataset, to_device=device, verbose=args.debug)
                    if args.imb_type == "step":
                        data = get_step_imbalanced_split_data(
                            data,
                            imbratio=args.imb_ratio,
                            random_seed=seed,
                            verbose=args.debug,
                        )
                    elif args.imb_type == "natural":
                        data = get_natural_imbalanced_split_data(
                            data,
                            imbratio=args.imb_ratio,
                            random_seed=seed,
                            verbose=args.debug,
                        )
                    else:
                        raise ValueError(
                            f"imb_type must be one of ['step', 'natural'], got {args.imb_type}."
                        )
                    data = get_data_stat(data, store_in_data=True, verbose=args.debug)

                    # initialize model
                    model = get_model(
                        gnn_arch=args.gnn_arch,
                        feat_dim=data.n_feat,
                        hid_dim=args.hid_dim,
                        out_dim=data.n_class,
                        n_layer=args.n_layer,
                        device=device,
                    )
                    # tobe augmenter
                    augmenter = BatAugmenter(mode=bat_mode, random_state=seed)
                    # trainer
                    trainer = NodeClassificationTrainer(
                        model=model,
                        data=data,
                        device=device,
                        augmenter=augmenter,  # BAT augmentation, to disable, set augmenter=None
                        learning_rate=args.lr,
                        weight_decay=args.weight_decay,
                        train_epoch=args.epochs,
                        early_stop_patience=args.early_stop,
                        eval_freq=1,
                        verbose_freq=None,
                        enable_tqdm=args.tqdm,
                        random_state=seed,
                    )
                    # train the GNN with BAT augmentation
                    best_model = trainer.train()
                    # print best results
                    print (f'Run {i_run}: ', end='')
                    trainer.print_best_results()
                    # save best results
                    best_results.append(trainer.best_eval_results)

                # print the average performance of the best model
                info = f"Avg Test Performance ({args.n_runs} runs): "
                for metric in trainer.eval_metrics.keys():
                    scores = np.array(
                        [
                            best_results[i][metric]["test"] * 100
                            for i in range(len(best_results))
                        ]
                    )
                    info += f" | {metric.upper()}: {scores.mean():.2f} ± {scores.std()/(len(scores)**0.5):.2f}"
                print(info)

Now using GPU #0: NVIDIA GeForce RTX 3050 Laptop GPU
Run experiment with
  - Datasets:        ['citeseer']
  - BAT modes:      ['dummy', 'bat1']
  - Imbalance types: {'step': [10], 'natural': [50]}

gpu_id       : 0
seed         : 42
n_runs       : 3
debug        : False
dataset      : cora
imb_type     : step
imb_ratio    : 10
gnn_arch     : SAGE
n_layer      : 3
hid_dim      : 256
lr           : 0.01
weight_decay : 0.0005
epochs       : 500
early_stop   : 50
tqdm         : False
bat_mode     : all

////////////////////////// Experiment: Imbalance Type [Step] - Ratio [10] //////////////////////////



 21%|██        | 104/500 [00:14<00:55,  7.11it/s]


Run 1: Best Epoch:   55 | train/val/test | ACC: 100.0/38.00/37.10 | BACC: 100.0/39.82/40.64 | MACRO-F1: 100.0/31.03/32.82 | aug time: 0.00ms 


 24%|██▍       | 120/500 [00:16<00:52,  7.29it/s]


Run 2: Best Epoch:   71 | train/val/test | ACC: 100.0/39.00/36.00 | BACC: 100.0/39.70/38.33 | MACRO-F1: 100.0/30.68/29.14 | aug time: 0.00ms 


 14%|█▍        | 71/500 [00:09<00:59,  7.23it/s]


Run 3: Best Epoch:   22 | train/val/test | ACC: 100.0/35.60/33.70 | BACC: 100.0/36.52/36.78 | MACRO-F1: 100.0/25.95/24.83 | aug time: 0.00ms 
Avg Test Performance (3 runs):  | ACC: 35.60 ± 0.82 | BACC: 38.59 ± 0.92 | MACRO-F1: 28.93 ± 1.89


 21%|██        | 104/500 [00:18<01:10,  5.64it/s]


Run 1: Best Epoch:   55 | train/val/test | ACC: 100.0/46.80/47.50 | BACC: 100.0/45.63/48.64 | MACRO-F1: 100.0/43.95/46.98 | aug time: 31.28ms 


 19%|█▉        | 97/500 [00:17<01:11,  5.62it/s]


Run 2: Best Epoch:   48 | train/val/test | ACC: 100.0/61.80/64.60 | BACC: 100.0/58.08/62.16 | MACRO-F1: 100.0/57.05/61.21 | aug time: 30.57ms 


 27%|██▋       | 134/500 [00:23<01:04,  5.65it/s]


Run 3: Best Epoch:   85 | train/val/test | ACC: 100.0/56.60/57.40 | BACC: 100.0/54.59/55.20 | MACRO-F1: 100.0/53.44/54.58 | aug time: 30.67ms 
Avg Test Performance (3 runs):  | ACC: 56.50 ± 4.05 | BACC: 55.33 ± 3.19 | MACRO-F1: 54.26 ± 3.36

//////////////////////// Experiment: Imbalance Type [Natural] - Ratio [50] ////////////////////////



 22%|██▏       | 109/500 [00:18<01:04,  6.04it/s]


Run 1: Best Epoch:   60 | train/val/test | ACC: 100.0/47.90/47.53 | BACC: 100.0/41.24/41.00 | MACRO-F1: 100.0/33.25/33.26 | aug time: 0.00ms 


 22%|██▏       | 109/500 [00:18<01:06,  5.90it/s]


Run 2: Best Epoch:   60 | train/val/test | ACC: 100.0/44.87/44.44 | BACC: 100.0/38.79/38.43 | MACRO-F1: 100.0/31.64/31.39 | aug time: 0.00ms 


 20%|█▉        | 98/500 [00:16<01:07,  5.99it/s]


Run 3: Best Epoch:   49 | train/val/test | ACC: 100.0/47.47/46.23 | BACC: 100.0/41.10/40.03 | MACRO-F1: 100.0/33.48/32.20 | aug time: 0.00ms 
Avg Test Performance (3 runs):  | ACC: 46.07 ± 0.73 | BACC: 39.82 ± 0.61 | MACRO-F1: 32.28 ± 0.44


 33%|███▎      | 167/500 [00:34<01:09,  4.82it/s]


Run 1: Best Epoch:  118 | train/val/test | ACC: 100.0/61.37/60.93 | BACC: 100.0/56.20/56.06 | MACRO-F1: 100.0/55.00/55.29 | aug time: 31.82ms 


 30%|██▉       | 148/500 [00:30<01:12,  4.85it/s]


Run 2: Best Epoch:   99 | train/val/test | ACC: 100.0/50.31/50.31 | BACC: 100.0/47.67/47.72 | MACRO-F1: 100.0/48.38/48.27 | aug time: 31.51ms 


 27%|██▋       | 134/500 [00:27<01:16,  4.80it/s]

Run 3: Best Epoch:   85 | train/val/test | ACC: 100.0/59.46/60.93 | BACC: 100.0/55.39/56.63 | MACRO-F1: 100.0/55.84/57.00 | aug time: 31.92ms 
Avg Test Performance (3 runs):  | ACC: 57.39 ± 2.89 | BACC: 53.47 ± 2.35 | MACRO-F1: 53.52 ± 2.18



