# Perform Source Detection

## Graph Information
Firstly, we have a highSchool graph with 774 nodes and 7992 edges, where nodes represent students and edges represent interactions. The network is undirected and unweighted.
- The graph is located in `./Conformalized-Network-Source-Detection/SD-STGCN/dataset/highSchool/data/graph/highSchool.edgelist`


## Overview of the dataset
- The datasets are in `./Conformalized-Network-Source-Detection/SD-STGCN/dataset/highSchool/data/SIR`
- Each data pickle contains a num of simulations (21200) of a SIR model on the highSchool network
- Some of the nodes are infected at the beginning of the simulation, which is the source node and will be labeled as 1, while the rest are labeled as 0. 
    - This means we have a highly imbalanced dataset, where most of the nodes are not the source node (labeled as 0), and only a few (<20) nodes are the source node (labeled as 1).

- Each simulation is a time series of the number of infected nodes at T=30 time step (up to 30 time steps)
    - During training, the first `skip` snapshots are ignored, and we only see the nf=16 snapshots

**Goal**
- The goal is to predict the source node of the infection based on the time series of infected, among the 774 nodes in the network.
    

- The data is split into training, validation, and test sets

## Data Structure
```python
- data (tuple): The dataset tuple containing:
    - data[0]: Time series (shape: [21200, 3, 774]).
            torch.Tensor: A PyTorch tensor of shape [21200, 3, 774], where:
                    - 21200: Number of simulations.
                    - 3: Feature vector dimensions [t0, t1, t2].
                    - 774: Number of nodes per simulation.
    - data[1]: Skip values (shape: [21200, 774]). # the labels of the nodes in the network
            torch.Tensor: A PyTorch tensor of shape [21200, 774], where:
                    - 21200: Number of simulations.
                    - 774: Number of nodes per simulation.
                    - Each value is either 0 (not a source node) or 1 (source node).
    - data[2]: Skip values (shape: [21200, 1]). # number of time steps to skip
            torch.Tensor: A PyTorch tensor of shape [21200, 1], where:
                    - 21200: Number of simulations.
                    - Each value indicates the number of time steps to skip before starting the prediction.
                    - Currently of no use, as we will include this when we process the data.

```


In [1]:
import torch
import torch.nn as nn

from torch_geometric.data import Dataset
from torch_geometric.loader import DataLoader

import sys, os
from base_models import GCN, GAT, GraphSAGE, GIN, MLP
from logger import PCB_Logger
from utils import *

In [2]:
import torch.cuda as sceptor

print(sceptor.is_available())

True


# Dataloaders with Feature Processing
- Load the time series simulation data and the labels
- Process the time series to be our desired feature

In [None]:
# Custom dataset
class SIRDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
# Define a custom dataset to load graphs from the train/test directories
class highSchoolDataset(Dataset):
    def __init__(self, dataset_dir, transform=None):
        
        super().__init__()
        self.dataset_dir = dataset_dir
        self.file_name = [f for f in os.listdir(dataset_dir) if f.endswith('.pt')]
        self.transform = transform

    def __len__(self):
        return len(self.graph_files)

    def __getitem__(self, idx):
        # Load torch-saved graph
        graph_path = os.path.join(self.dataset_dir, self.graph_files[idx])
        graph = torch.load(graph_path)

        # Apply optional transformations
        if self.transform:
            graph = self.transform(graph)

        return graph


def get_data_loaders(dataset_name, dataset_dir, batch_size=4):
    """
    Returns train and test DataLoaders for the specified dataset.

    Args:
        dataset_name (str): Name of the dataset ('wacv' or 'fpic').
        batch_size (int): Batch size for the DataLoaders.

    Returns:
        tuple: train_loader, test_loader
    """
    if dataset_name.lower() in ['wacv', 'fpic']:
        dataset_path = os.path.join(dataset_dir, f'Graph-{dataset_name[0].upper()}', 'graphs')
    else:
        raise ValueError("Invalid dataset name. Choose either 'wacv' or 'fpic'.")

    train_dir = os.path.join(dataset_path, 'train')
    test_dir = os.path.join(dataset_path, 'test')

    train_dataset = GraphDataset(train_dir)
    test_dataset = GraphDataset(test_dir)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


In [10]:
import glob
import pandas as pd

def generate_file_csvs(base_dirs):
    """
    For each dataset directory, generate a CSV listing .pt, image, and mask files.
    Args:
        base_dirs (dict): Mapping from dataset name to base directory path.
    """
    for name, base_dir in base_dirs.items():
        graph_dir = os.path.join(base_dir, "graphs")
        # Search for .pt files recursively under train and test subfolders in graph_dir
        pt_files = []
        for split in ["train", "test"]:
            split_dir = os.path.join(graph_dir, split)
            pt_files.extend(sorted(glob.glob(os.path.join(split_dir, "*.pt"), recursive=True)))
        print(f"Found {len(pt_files)} .pt files in {graph_dir} (train/test subfolders included)")
        
        image_dir = os.path.join(base_dir, "images")
        mask_dir = os.path.join(base_dir, "masks")

        rows = []
        for pt_path in pt_files:
            base_fname = os.path.splitext(os.path.basename(pt_path))[0]
            
            if name == "Graph-W":
                img_path = os.path.join(image_dir, base_fname + ".jpg")
                mask_path = os.path.join(mask_dir, base_fname + ".xml")
            else:
                img_path = os.path.join(image_dir, base_fname + ".png")
                mask_path = os.path.join(mask_dir, base_fname + ".png")
            rows.append({
            "pt_file": pt_path,
            "image_file": img_path,
            "mask_file": mask_path
            })
        df = pd.DataFrame(rows)
        csv_path = os.path.join(base_dir, f"{name}_file_list.csv")
        df.to_csv(csv_path, index=False)
        print(f"Saved: {csv_path}")

base_dirs = {
    "Graph-W": "./data/GraphPCB/Graph-W/",
    "Graph-F": "./data/GraphPCB/Graph-F/"
}
generate_file_csvs(base_dirs)

Found 47 .pt files in ./data/GraphPCB/Graph-W/graphs (train/test subfolders included)
Saved: ./data/GraphPCB/Graph-W/Graph-W_file_list.csv
Found 162 .pt files in ./data/GraphPCB/Graph-F/graphs (train/test subfolders included)
Saved: ./data/GraphPCB/Graph-F/Graph-F_file_list.csv


# Inference

In [18]:
def inference(model, test_loader):
    model.eval()
    all_preds, all_labels = [], []
    predictions = []

    with torch.no_grad():
        for graph_idx in range(len(test_loader.dataset)):
            device = next(model.parameters()).device
            graph = test_loader.dataset[graph_idx]
            graph_file_name = test_loader.dataset.graph_files[graph_idx]
            graph = graph.to(device)

            x, adj = graph.x, graph.edge_index
            y_true = graph.y
            # Forward pass
            logits = model(x, adj)
            preds = logits.argmax(dim=1)

            all_preds.append(preds.cpu())
            all_labels.append(y_true.cpu())
    
            predictions.append({
                "graph_id": graph_file_name,
                "labels": preds.tolist()
            })

    all_preds = torch.cat(all_preds, dim=0)
    all_labels = torch.cat(all_labels, dim=0)

    return all_preds.numpy(), all_labels.numpy(), predictions

# Training Function
- for every 10 epoch, save the model and evaluate on test set
- save the training log into a .txt
- save the last checkpoint
- save the predictions into a .json

In [16]:
def train_step(model, train_loader, optimizer, scheduler, device):
    total_loss = 0
    model.train()

    for batch in train_loader:
        batch = batch.to(device)

        batch = batch.sort(sort_by_row=False)

        x, adj = batch.x, batch.edge_index
        y = batch.y

        logits = model(x, adj)
    
        loss = compute_loss(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()
    avg_loss = total_loss / max(1, len(train_loader))
    return avg_loss


In [6]:
# the main function to train the model
def train_model(model, config):
    """
    Generic function to train different GNN models.
    """
    set_seed(42)
    train_loader, test_loader = get_data_loaders(config['dataset'], config['dataset_dir'], batch_size=config["batch_size"])

    logger = PCB_Logger(home_dir=config['home_dir'], config=config)

    torch.cuda.empty_cache()

    # ✅ Move model to device
    model = model.to(config["device"])
    
    # ✅ Define optimizer & scheduler
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config["scheduler"]["step_size"], gamma=config["scheduler"]["gamma"])

    # ✅ Training Loop
    for epoch in range(config["num_epochs"]):
        avg_loss = train_step(model, train_loader, optimizer, scheduler, config["device"])
        logger.log(f"Epoch {epoch + 1:03d}, Loss: {avg_loss:.10f}")
        # ✅ Evaluate every 10 epochs OR at the last epoch
        if (epoch + 1) % 10 == 0 or (epoch + 1 == config["num_epochs"]):
            all_preds, all_labels, predictions = inference(model, test_loader)
            metrics = compute_metrics(all_preds, all_labels)
            logger.update_metrics(metrics, predictions)
    
    logger.finish_run()
    # save only the final model
    checkpoint_path = os.path.join(logger.checkpoint_dir, f"model_epoch_{epoch + 1}_{avg_loss:.4f}.pth")
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'metrics': metrics,
    }, checkpoint_path)
    logger.log(f"✅ Model checkpoint saved to {checkpoint_path}")

    return model, metrics, logger.checkpoint_dir

# Basic Config Settings

In [11]:
# Set the home directory for the dataset
home_dir = os.path.expanduser("~")
dataset_dir = os.path.join(home_dir, "GraphPCB_Analysis/GraphPCB")
print("Dataset directory:", dataset_dir)
# verify if the directory exists

Dataset directory: /home/lantian/GraphPCB_Analysis/GraphPCB


In [14]:
# Basic configuration 
config = {
    "experiment_name": "",
    "dataset_dir": dataset_dir,
    "home_dir": home_dir,
    "dataset": "fpic",
    "device": "cuda:0",

    # model architecture
    "model": "",
    "input_dim": 1024,
    "hidden_dim": 256,
    "output_dim": 4,

    # regularization
    "dropout": 0.3,
    "use_batchnorm": True,
    "use_bias": False,
    "weight_decay": 1e-3,
    "scheduler": {"type": "StepLR", "step_size": 30, "gamma": 0.5},

    # training parameters
    "learning_rate": 0.001,
    "num_epochs": 200, 
}

# MLP

In [19]:
run_num = 0
notes = ""
model_name = "MLP"
num_layers = 2
loss_type = "NLL" # NLL | Focal | Contrast
exp_name = f"{model_name}{num_layers}-{loss_type}_{notes}_{run_num}"


config.update({
    "experiment_name": exp_name,
    "device": "cuda",
    "dataset": "WACV",
    "batch_size": 4,
    "loss_type": loss_type,
    "model": model_name,
    "input_dim": 1024,
    "hidden_dim": 256,
    "output_dim": 4,
    "num_layers": num_layers,
    "dropout": 0.3,
    "use_batchnorm": True,
    "use_skip": False,
    "num_epochs": 200,
    "learning_rate": 0.0001,
    "weight_decay": 1e-3,
    "scheduler": {"type": "StepLR", "step_size": 10, "gamma": 0.5}
})

# Set seed before training
set_seed(42)

# Instantiate the MLP model
model = MLP(
    input_dim=config["input_dim"],
    hidden_dim=config["hidden_dim"],
    output_dim=config["output_dim"],
    num_layers=config["num_layers"],
    dropout=config["dropout"],
    use_batchnorm=config["use_batchnorm"],
    use_skip=config["use_skip"]
)

# Train the MLP model
train_model(model, config)

Checkpoint directory: /home/lantian/PCB_Analysis/WACV-trained/MLP2-NLL__0
Experiment Configuration:
experiment_name: MLP2-NLL__0
dataset_dir: /home/lantian/GraphPCB_Analysis/GraphPCB
home_dir: /home/lantian
dataset: WACV
device: cuda
model: MLP
input_dim: 1024
hidden_dim: 256
output_dim: 4
dropout: 0.3
use_batchnorm: True
use_bias: False
weight_decay: 0.001
scheduler: {'type': 'StepLR', 'step_size': 10, 'gamma': 0.5}
learning_rate: 0.0001
num_epochs: 200
batch_size: 4
loss_type: NLL
num_layers: 2
use_skip: False
Using device: cuda
Loading dataset: WACV
Results will be saved to /home/lantian/PCB_Analysis/WACV-trained/MLP2-NLL__0.
Epoch 001, Loss: 1.2283591628
Epoch 002, Loss: 0.9837212026
Epoch 003, Loss: 0.8397917867
Epoch 004, Loss: 0.7928972423
Epoch 005, Loss: 0.7206747353
Epoch 006, Loss: 0.6670337737
Epoch 007, Loss: 0.6535654843
Epoch 008, Loss: 0.5922815681
Epoch 009, Loss: 0.5174355954
Epoch 010, Loss: 0.5518867254
  F1-Score (macro): 0.4547791356
  Weighted F1: 0.9132314978
  

(MLP(
   (layers): ModuleList(
     (0): Linear(in_features=1024, out_features=256, bias=True)
     (1): Linear(in_features=256, out_features=4, bias=True)
   )
   (bns): ModuleList(
     (0): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   )
   (dropout): Dropout(p=0.3, inplace=False)
 ),
 {'f1_macro': 0.497485367232242,
  'weighted_f1': 0.9441108208847313,
  'f1_3_class': 0.6761229746631521,
  'f1_per_class': [0.5894736842105263,
   0.37894736842105264,
   0.05813953488372093,
   0.9633808814136683],
  'precision_per_class': [0.46473029045643155,
   0.24161073825503357,
   0.033783783783783786,
   0.9988962472406181],
  'recall_per_class': [0.8057553956834532,
   0.8780487804878049,
   0.20833333333333334,
   0.9303042763157895],
  'confusion_matrix': [[112, 21, 1, 5],
   [4, 36, 1, 0],
   [5, 14, 5, 0],
   [120, 78, 141, 4525]]},
 '/home/lantian/PCB_Analysis/WACV-trained/MLP2-NLL__0')

# GCN

In [20]:
run_num = 0
loss_type = "NLL"

config.update({
    "experiment_name": f"GCN-{loss_type}_{run_num}",

    "dataset": "fpic",  # or "fpic"
    "batch_size": 4,
    "model": "GCN",

    "hidden_dim": 256,
    "num_layers": 2,
    "learning_rate": 0.001,
    
})

model = GCN(
    in_dim=config["input_dim"],
    hidden_dim=config["hidden_dim"],
    out_dim=config["output_dim"],
    num_layers=config["num_layers"],
    dropout=config["dropout"],
    use_batchnorm=config["use_batchnorm"]
)
print(config['dataset_dir'])

model, metrics, checkpoint_dir = train_model(model, config)

/home/lantian/GraphPCB_Analysis/GraphPCB
Checkpoint directory: /home/lantian/PCB_Analysis/FPIC-trained/GCN-NLL_0
Experiment Configuration:
experiment_name: GCN-NLL_0
dataset_dir: /home/lantian/GraphPCB_Analysis/GraphPCB
home_dir: /home/lantian
dataset: fpic
device: cuda
model: GCN
input_dim: 1024
hidden_dim: 256
output_dim: 4
dropout: 0.3
use_batchnorm: True
use_bias: False
weight_decay: 0.001
scheduler: {'type': 'StepLR', 'step_size': 10, 'gamma': 0.5}
learning_rate: 0.001
num_epochs: 200
batch_size: 4
loss_type: NLL
num_layers: 2
use_skip: False
Using device: cuda
Loading dataset: fpic
Results will be saved to /home/lantian/PCB_Analysis/FPIC-trained/GCN-NLL_0.
Epoch 001, Loss: 1.3075777416
Epoch 002, Loss: 1.1169439534
Epoch 003, Loss: 1.0748033976
Epoch 004, Loss: 1.0366612591
Epoch 005, Loss: 1.0391599988
Epoch 006, Loss: 0.9694701454
Epoch 007, Loss: 1.0032139790
Epoch 008, Loss: 1.0295051788
Epoch 009, Loss: 0.9906845997
Epoch 010, Loss: 0.9263791791
  F1-Score (macro): 0.3556008

# GAT

In [64]:
run_num = 0
loss_type = "NLL"

config.update({
    "experiment_name": f"GAT-{loss_type}_{run_num}",
    "device": "cuda",
    "dataset": "fpic",  # or "fpic"
    "batch_size": 4,
    "loss_type": loss_type,
    "model": "GAT",
    "input_dim": 1024,
    "hidden_dim": 1024,
    "output_dim": 4,
    "num_heads": 4,
    "num_layers": 2,
    "dropout": 0.3,
    "use_batchnorm": True,
    "use_bias": False,
    "num_epochs": 200,
    "learning_rate": 0.0001,
    "weight_decay": 1e-3,
    "scheduler": {"type": "StepLR", "step_size": 20, "gamma": 0.5}
})


# Instantiate the model
model = GAT(
    in_dim=config["input_dim"],
    hidden_dim=config["hidden_dim"],
    out_dim=config["output_dim"],
    num_layers=config["num_layers"],
    num_heads=config["num_heads"],
    dropout=config["dropout"],
    use_batchnorm=config["use_batchnorm"]
)


model, metrics, checkpoint_dir = train_model(model, config)

Checkpoint directory: /home/lantian/PCB_Analysis/FPIC-trained/GAT-NLL_0
Experiment Configuration:
experiment_name: GAT-NLL_0
dataset_dir: /home/lantian/GraphPCB/
home_dir: /home/lantian/
dataset: fpic
device: cuda
model: GAT
input_dim: 1024
hidden_dim: 1024
output_dim: 4
dropout: 0.3
use_batchnorm: True
use_bias: False
weight_decay: 0.001
scheduler: {'type': 'StepLR', 'step_size': 20, 'gamma': 0.5}
learning_rate: 0.0001
num_epochs: 200
batch_size: 4
num_layers: 2
loss_type: NLL
num_heads: 4
Using device: cuda
Loading dataset: fpic
Results will be saved to /home/lantian/PCB_Analysis/FPIC-trained/GAT-NLL_0.
Epoch 001, Loss: 1.3706389912
Epoch 002, Loss: 1.2006024862
Epoch 003, Loss: 1.1642258969
Epoch 004, Loss: 1.0859312477
Epoch 005, Loss: 1.0945229880
Epoch 006, Loss: 1.0387999539
Epoch 007, Loss: 1.0586524893
Epoch 008, Loss: 1.0353469273
Epoch 009, Loss: 1.0305183180
Epoch 010, Loss: 0.9872889848
  F1-Score (macro): 0.2718814467
  Weighted F1: 0.6093899855
  Subset F1-Score (3-class

# GIN

In [66]:

run_num = 0
loss_type = "NLL"

config.update({
    "experiment_name": "GIN-{}-{}".format(loss_type, run_num),
    "device": "cuda",
    "dataset": "wacv",  # or "fpic"
    "batch_size": 4,
    "model": "GIN",
    "input_dim": 1024,
    "hidden_dim": 256,
    "output_dim": 4,
    "num_layers": 2,
    "dropout": 0.3,
    "use_batchnorm": True,
    "use_bias": False,
    "num_epochs": 200,
    "learning_rate": 0.0001,
    "weight_decay": 1e-3,
    "scheduler": {"type": "StepLR", "step_size": 20, "gamma": 0.5}
})

# Set seed before training
set_seed(42)


model = GIN(
    in_dim=config["input_dim"],
    hidden_dim=config["hidden_dim"],
    out_dim=config["output_dim"],
    num_layers=config["num_layers"],
    dropout=config["dropout"],
    use_batchnorm=config["use_batchnorm"]
)

model, metrics, checkpoint_dir = train_model(model, config)


Checkpoint directory: /home/lantian/PCB_Analysis/WACV-trained/GIN-NLL-0
Experiment Configuration:
experiment_name: GIN-NLL-0
dataset_dir: /home/lantian/GraphPCB/
home_dir: /home/lantian/
dataset: wacv
device: cuda
model: GIN
input_dim: 1024
hidden_dim: 256
output_dim: 4
dropout: 0.3
use_batchnorm: True
use_bias: False
weight_decay: 0.001
scheduler: {'type': 'StepLR', 'step_size': 20, 'gamma': 0.5}
learning_rate: 0.0001
num_epochs: 200
batch_size: 4
num_layers: 2
loss_type: NLL
num_heads: 4
Using device: cuda
Loading dataset: wacv
Results will be saved to /home/lantian/PCB_Analysis/WACV-trained/GIN-NLL-0.
Epoch 001, Loss: 1.4157607675
Epoch 002, Loss: 1.3617971659
Epoch 003, Loss: 1.3001927853
Epoch 004, Loss: 1.2760290742
Epoch 005, Loss: 1.2342347741
Epoch 006, Loss: 1.2145321965
Epoch 007, Loss: 1.1660370231
Epoch 008, Loss: 1.0934383810
Epoch 009, Loss: 1.1151570678
Epoch 010, Loss: 1.0912549078
  F1-Score (macro): 0.2648228384
  Weighted F1: 0.8308759421
  Subset F1-Score (3-class)

  precision = np.mean(TP / (TP + FP)) if np.sum(TP + FP) > 0 else 0


Epoch 032, Loss: 0.7702454984
Epoch 033, Loss: 0.9002290040
Epoch 034, Loss: 0.8476024806
Epoch 035, Loss: 0.8175182045
Epoch 036, Loss: 0.8012893081
Epoch 037, Loss: 0.7521019995
Epoch 038, Loss: 0.8073277593
Epoch 039, Loss: 0.7874461174
Epoch 040, Loss: 0.6919680685
  F1-Score (macro): 0.2994430125
  Weighted F1: 0.8359097943
  Subset F1-Score (3-class): 0.5236314984
  F1 per class: [0.25157232704402516, 0.0633147113594041, 0.01973684210526316, 0.8631481695345883]
  Precision per class: [0.16096579476861167, 0.034274193548387094, 0.010714285714285714, 0.9847167325428196]
  Recall per class: [0.5755395683453237, 0.4146341463414634, 0.125, 0.768297697368421]
  Confusion Matrix:
[[80, 18, 10, 31], [7, 17, 2, 15], [3, 6, 3, 12], [407, 455, 265, 3737]]
Epoch 041, Loss: 0.7445536554
Epoch 042, Loss: 0.7448407710
Epoch 043, Loss: 0.7286055714
Epoch 044, Loss: 0.7885140836
Epoch 045, Loss: 0.7903052330
Epoch 046, Loss: 0.6991480350
Epoch 047, Loss: 0.7855380535
Epoch 048, Loss: 0.7634636641

  precision = np.mean(TP / (TP + FP)) if np.sum(TP + FP) > 0 else 0


Epoch 061, Loss: 0.7216233671
Epoch 062, Loss: 0.6473555237
Epoch 063, Loss: 0.6394737780
Epoch 064, Loss: 0.7048819363
Epoch 065, Loss: 0.7000706136
Epoch 066, Loss: 0.7578278631
Epoch 067, Loss: 0.7342059225
Epoch 068, Loss: 0.7042975307
Epoch 069, Loss: 0.6629285365
Epoch 070, Loss: 0.6224938989
  F1-Score (macro): 0.3297853464
  Weighted F1: 0.8832291116
  Subset F1-Score (3-class): 0.6472110218
  F1 per class: [0.274390243902439, 0.11846689895470384, 0.014925373134328358, 0.9113588696324099]
  Precision per class: [0.17408123791102514, 0.06910569105691057, 0.00909090909090909, 0.9840286054827175]
  Recall per class: [0.6474820143884892, 0.4146341463414634, 0.041666666666666664, 0.8486842105263158]
  Confusion Matrix:
[[90, 11, 0, 38], [10, 17, 0, 14], [4, 4, 1, 15], [413, 214, 109, 4128]]
Epoch 071, Loss: 0.7174106300
Epoch 072, Loss: 0.7495146155
Epoch 073, Loss: 0.6997005194
Epoch 074, Loss: 0.6566543639
Epoch 075, Loss: 0.7415800571
Epoch 076, Loss: 0.7201370806
Epoch 077, Loss

# GraphSAGE

## Aggregators

In [21]:
def get_aggregator(aggr_type):
    """
    Get the appropriate aggregator function based on the type.

    Args:
        aggr_type (str): Type of aggregation ('mean', 'sum', 'max').

    Returns:
        str: Aggregator type.
    """
    if aggr_type == 'softmax':
        return aggr.SoftmaxAggregation(learn=True)
    elif aggr_type == 'attn':
        gate_nn = nn.Sequential(
                        nn.Linear(1024, 128),
                        nn.ReLU(),
                        nn.Linear(128, 1)
                    )
        return aggr.AttentionalAggregation(gate_nn=gate_nn)
    else:
        return aggr_type

In [22]:
from torch_geometric.nn import aggr
run_num = 1
notes = ""
model_name = "GraphSAGE"
num_layers = 2
loss_type = "NLL" # NLL | Focal | Contrast
aggr_type = "softmax" # "mean" | "max" |"sum" | 'std' | 'attn'
# https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html?highlight=torch_geometric+nn+aggr#aggregation-operators
# multi_aggr_type = aggr.MultiAggregation(['mean', 'std', aggr.SoftmaxAggregation(learn=True)])


config.update({
    "experiment_name": f"{model_name}{num_layers}-{loss_type}_{aggr_type}_{notes}_{run_num}",
    "device": "cuda",
    "dataset": "wacv",  # or "fpic"
    "batch_size": 4,

    "model": model_name,
    "input_dim": 1024,
    "hidden_dim": 1024,
    "output_dim": 4,
    "num_layers": num_layers,
    "dropout": 0.3,
    "use_batchnorm": True,
    "use_skip": False,
    "aggr": aggr_type,  # Aggregation type
    "num_epochs": 200,
    "learning_rate": 0.0001,
    "weight_decay": 1e-3,
    "scheduler": {"type": "StepLR", "step_size": 10, "gamma": 0.5}
})

gate_nn = nn.Sequential(
    nn.Linear(1024, 128),
    nn.ReLU(),
    nn.Linear(128, 1)
)

# ✅ Instantiate the 2-layer GraphSAGE model
model = GraphSAGE(
    in_dim=config["input_dim"],
    hidden_dim=config["hidden_dim"],
    out_dim=config["output_dim"],
    num_layers=config["num_layers"],
    dropout=config["dropout"],
    use_batchnorm=config["use_batchnorm"],
    aggr=get_aggregator(config["aggr"]),
    use_skip=config["use_skip"]
)

model, metrics, checkpoint_dir = train_model(model, config)


Checkpoint directory: /home/lantian/PCB_Analysis/WACV-trained/GraphSAGE2-NLL_softmax__1
Experiment Configuration:
experiment_name: GraphSAGE2-NLL_softmax__1
dataset_dir: /home/lantian/GraphPCB_Analysis/GraphPCB
home_dir: /home/lantian
dataset: wacv
device: cuda
model: GraphSAGE
input_dim: 1024
hidden_dim: 1024
output_dim: 4
dropout: 0.3
use_batchnorm: True
use_bias: False
weight_decay: 0.001
scheduler: {'type': 'StepLR', 'step_size': 10, 'gamma': 0.5}
learning_rate: 0.0001
num_epochs: 200
batch_size: 4
loss_type: NLL
num_layers: 2
use_skip: False
aggr: softmax
Using device: cuda
Loading dataset: wacv
Results will be saved to /home/lantian/PCB_Analysis/WACV-trained/GraphSAGE2-NLL_softmax__1.
Epoch 001, Loss: 1.2473115981
Epoch 002, Loss: 0.6990422755
Epoch 003, Loss: 0.4828718498
Epoch 004, Loss: 0.3403182924
Epoch 005, Loss: 0.3070732892
Epoch 006, Loss: 0.2482701361
Epoch 007, Loss: 0.2073400542
Epoch 008, Loss: 0.1797796570
Epoch 009, Loss: 0.1360847808
Epoch 010, Loss: 0.1269039623
