In [1]:
import os
import random
import pickle
from joblib import Parallel, delayed
import glob
import torch
import networkx as nx

# === Standard Library ===
import math
import random
import time
import heapq
import itertools
from collections import defaultdict, deque
from itertools import combinations
from typing import Any, Tuple, Dict, List, Set, Sequence, Union

# === Third-Party Libraries ===

# --- Scientific Computing ---
import numpy as np
import pandas as pd
import scipy.sparse as sp
from scipy.sparse    import coo_matrix
from scipy.optimize import linprog

# --- Plotting ---
import matplotlib.pyplot as plt

# --- Parallel Processing ---
from joblib import Parallel, delayed
from tqdm import tqdm

# --- Graph Processing ---
import networkx as nx

# --- JIT Compilation ---
from numba import njit, prange

from torch_geometric.loader import NeighborLoader
from torch_geometric.utils import from_networkx
from torch_geometric.nn import SAGEConv


# --- Model definition ---
import torch.nn as nn
import torch.nn.functional as F


# 1. Dataset generation

In [None]:
structural_params = {
    'ER': {'p': [0.0443, 0.0667]},
    'BA': {'m': [2, 3]},
    'SW': {'beta': [4, 5]}
}
train_sizes   = [20, 50, 80]
test_sizes    = [100, 200, 300, 500, 1000]
reliability_p = [i/10 for i in range(1, 11)]  # 0.1, 0.2, ..., 1.0
val_reliability_p = [0.15, 0.35, 0.55, 0.75, 0.95]  # for validation set

n_train     = 3
n_val       = 6
n_test100   = 3
n_test_large = 2

base_dir     = 'data'
graphs_dir   = os.path.join(base_dir, 'graphs')
labels_dir   = os.path.join(base_dir, 'labels')

In [None]:
def make_split_dirs():
    for split in ['train', 'val', 'test100', 'test_large', 'test_10000']:
        os.makedirs(os.path.join(graphs_dir, split), exist_ok=True)
        os.makedirs(os.path.join(labels_dir, split), exist_ok=True)

make_split_dirs()

In [4]:
# --- Graph generation ---
def gen_graph(topo, size, s_param, rel_p, seed):
    random.seed(seed)
    if topo == 'ER':
        G = nx.erdos_renyi_graph(size, s_param, seed=seed)
    elif topo == 'BA':
        G = nx.barabasi_albert_graph(size, int(s_param), seed=seed)
    elif topo == 'SW':
        G = nx.watts_strogatz_graph(size, k=4, p=s_param, seed=seed)
    nx.set_edge_attributes(G, rel_p, 'p')
    # for u, v in G.edges():
    #     G[u][v]['p'] = rel_p

    return G

In [5]:
def save_graph(G, meta, idx, split):
    # save into corresponding split subfolder
    fname = f"{meta['topo']}_sz{meta['size']}_sp{meta['s_param']}_rp{meta['rel_p']}_{split}_{idx}.pkl"
    path = os.path.join(graphs_dir, split, fname)
    # ensure the directory exists
    dirpath = os.path.dirname(path)
    os.makedirs(dirpath, exist_ok=True)
    # if a directory with the same name exists, this will fail; remove or rename it first
    with open(path, 'wb') as f:
        pickle.dump({'graph': G, 'meta': meta}, f)

In [None]:
def generate_split(split, sizes, n_graphs, reliability_p):
    for topo, params in structural_params.items():
        key = list(params.keys())[0]

        for s_param in params[key]:
            for size in sizes:
                for rel_p in reliability_p:
                    for i in range(n_graphs):

                        # seed = hash((topo, s_param, size, rel_p, split, i)) & 0xffffffff
                        seed = 42
                        G = gen_graph(topo, size, s_param, rel_p, seed)
                        meta = {'topo': topo, 'size': size, 's_param': s_param, 'rel_p': rel_p}
                        save_graph(G, meta, i, split)


In [66]:
# generate_split('train',      train_sizes,   n_train, reliability_p)
# generate_split('val',        [100],         n_val, reliability_p=val_reliability_p)
# generate_split('test100',    [100],         n_test100, reliability_p)
generate_split('test_large', test_sizes[1:],n_test_large, reliability_p)

In [3]:
def nx_to_csr(G: nx.Graph) -> Tuple[List[int], Dict[int, int], np.ndarray, np.ndarray, np.ndarray]:
     """Convert an undirected NetworkX graph (edge attr `'p'`) to CSR arrays."""
     nodes: List[int] = list(G.nodes())
     idx_of: Dict[int, int] = {u: i for i, u in enumerate(nodes)}

     indptr: List[int] = [0]
     indices: List[int] = []
     probs: List[float] = []

     for u in nodes:
         for v in G.neighbors(u):
             indices.append(idx_of[v])
             probs.append(G.edges[u, v]['p'])
         indptr.append(len(indices))

     return (
         nodes,
         idx_of,
         np.asarray(indptr, dtype=np.int32),
         np.asarray(indices, dtype=np.int32),
         np.asarray(probs, dtype=np.float32),
     )

@njit(inline="always")
def _bfs_component_size(start: int,
                    indptr: np.ndarray,
                    indices: np.ndarray,
                    probs: np.ndarray,
                    deleted: np.ndarray) -> int:
    """Return |C_u|−1 for **one** random realisation (stack BFS)."""
    n = deleted.size
    stack = np.empty(n, dtype=np.int32)
    visited = np.zeros(n, dtype=np.uint8)

    size = 1
    top = 0
    stack[top] = start
    top += 1
    visited[start] = 1

    while top:
        top -= 1
        v = stack[top]
        for eid in range(indptr[v], indptr[v + 1]):
            w = indices[eid]
            if deleted[w]:
                continue
            if np.random.random() >= probs[eid]:
                continue
            if visited[w]:
                continue
            visited[w] = 1
            stack[top] = w
            top += 1
            size += 1
    return size - 1

@njit(parallel=True)
def epc_mc(indptr: np.ndarray,
            indices: np.ndarray,
            probs: np.ndarray,
            deleted: np.ndarray,
            num_samples: int) -> float:
    """Monte‑Carlo estimator of **expected pairwise connectivity** (EPC)."""
    surv = np.where(~deleted)[0]
    m = surv.size
    if m < 2:
        return 0.0

    acc = 0.0
    for _ in prange(num_samples):
        u = surv[np.random.randint(m)]
        acc += _bfs_component_size(u, indptr, indices, probs, deleted)

    return (m * acc) / (2.0 * num_samples)

def epc_mc_deleted(
  G: nx.Graph,
  S: set,
  num_samples: int = 100_000,
) -> float:
  # build csr once
  nodes, idx_of, indptr, indices, probs = nx_to_csr(G)
  n = len(nodes)

  # turn python set S into a mask (node-IDs to delete)
  deleted = np.zeros(n, dtype=np.bool_)
  for u in S:
    deleted[idx_of[u]] = True

  epc = epc_mc(indptr, indices, probs, deleted, num_samples)

  return epc

In [None]:
# --- Label generation  ---
def compute_labels(file_path, mc_samples=1_000):

    data = pickle.load(open(file_path, 'rb'))
    G_orig = data['graph']

    base = epc_mc_deleted(G_orig, set(), num_samples=mc_samples)
    
    n = G_orig.number_of_nodes()
    labels = torch.zeros(n)

    for v in G_orig.nodes():
        # print(v)
        # print(type(set(v)))
        drop = epc_mc_deleted(G_orig, {v}, num_samples=mc_samples)
        labels[v] = base - drop
        
    # stabilise scale
    labels = torch.log1p(labels.clamp(min=0))

    # save labels
    fname = os.path.basename(file_path).replace('.pkl', '_labels.pt')
    split = os.path.basename(os.path.dirname(file_path))
    save_path = os.path.join(labels_dir, split, fname)
    torch.save(labels, save_path)

In [47]:
def greedy_cndp_epc_celf(
    G: nx.Graph,
    K: int,
    *,
    num_samples: int = 20_000,
    reuse_csr: Tuple = None,
    return_trace: bool = False,
) -> Union[Set[int], Tuple[Set[int], List[float]]]:
    """Select **K** nodes that minimise EPC using CELF & Numba.

    Parameters
    ----------
    return_trace : bool, default *False*
        If *True*, also return a list `[σ(S₁), σ(S₂), …]` where `S_i` is the
        prefix after deleting *i* nodes.  Useful for plots.
    """

    # CSR cache --------------------------------------------------------
    if reuse_csr is None:
        nodes, idx_of, indptr, indices, probs = nx_to_csr(G)
    else:
        nodes, idx_of, indptr, indices, probs = reuse_csr
    n = len(nodes)

    deleted = np.zeros(n, dtype=np.bool_)
    current_sigma = epc_mc(indptr, indices, probs, deleted, num_samples)

    pq: List[Tuple[float, int, int]] = []  # (-gain, v, last_round)
    gains = np.empty(n, dtype=np.float32)

    for v in range(n):
        deleted[v] = True
        gains[v] = current_sigma - epc_mc(indptr, indices, probs, deleted, num_samples)
        deleted[v] = False
        heapq.heappush(pq, (-gains[v], v, 0))

    S: Set[int] = set()
    trace: List[float] = []
    round_ = 0

    trace.append(current_sigma)

    while len(S) < K and pq:
        neg_gain, v, last = heapq.heappop(pq)
        if last == round_:
            # gain up‑to‑date → accept
            S.add(nodes[v])
            deleted[v] = True
            current_sigma += neg_gain  # add neg (= subtract gain)
            round_ += 1
            if return_trace:
                trace.append(current_sigma)
        else:
            # recompute gain lazily
            deleted[v] = True
            new_gain = current_sigma - epc_mc(indptr, indices, probs, deleted, num_samples)
            deleted[v] = False
            heapq.heappush(pq, (-new_gain, v, round_))

    return (S, trace) if return_trace else S

In [8]:
def solve_lp_reaga_sparse(G: nx.Graph, pre_fixed: set, k: int):
    V = list(G.nodes())
    n = len(V)

    # variables: s_i  (i = 0…n-1)      x_ij (j = 0…m2-1)
    Pairs = [tuple(sorted(e)) for e in combinations(V, 2)]
    m2    = len(Pairs)
    Nvar  = n + m2
    s_idx = {v: i         for i, v in enumerate(V)}
    x_idx = {e: n + j     for j, e in enumerate(Pairs)}

    
    rows, cols, data = [], [], []
    rhs              = []

    def add_coef(r, c, val):
        rows.append(r); cols.append(c); data.append(val)

    r = 0 

    # budget 
    for i in range(n):
        add_coef(r, i, 1.0)
    rhs.append(k); r += 1

    # edge upper bounds  x_uv − s_u − s_v ≤ 1 − p_uv
    for (u, v) in G.edges():
        u, v   = sorted((u, v))
        puv    = G.edges[u, v]['p']
        add_coef(r, x_idx[(u, v)],  1.0)
        add_coef(r, s_idx[u],      -1.0)
        add_coef(r, s_idx[v],      -1.0)
        rhs.append(1 - puv); r += 1

    # triangle cuts for each real edge (i,j) and every
    for (i, j) in G.edges():
        i, j = sorted((i, j))
        for k_ in V:
            if k_ == i or k_ == j:
                continue
            add_coef(r, x_idx[tuple(sorted((i, k_)))],  1.0)  
            add_coef(r, x_idx[(i, j)]               , -1.0)  
            add_coef(r, x_idx[tuple(sorted((j, k_)))], -1.0)   
            rhs.append(0.0); r += 1

    n_rows = r
    A_ub   = coo_matrix((data, (rows, cols)), shape=(n_rows, Nvar)).tocsr()
    b_ub   = np.asarray(rhs)

    # bounds 
    bounds = [(0.0, 1.0)] * Nvar
    for v in pre_fixed:
        bounds[s_idx[v]] = (1.0, 1.0)

    #  objective 
    c = np.zeros(Nvar)
    for e in Pairs:
        c[x_idx[e]] = -1.0

    # 
    res = linprog(c, A_ub=A_ub, b_ub=b_ub,
                  bounds=bounds, method="highs")
    if not res.success:
        raise RuntimeError("LP infeasible: " + res.message)

    #
    s_vals = {v: res.x[s_idx[v]] for v in V}
    x_sum  = res.x[n:].sum()
    obj    = len(Pairs) - x_sum
    return s_vals, obj

def local_search_(
  G: nx.Graph,
  S_init: set,
  num_samples: int = 10_000
):
  """1-swap local search"""

  S = S_init.copy()
  nodes_not_in_set = set(G.nodes()) - S

  current_epc = epc_mc_deleted(G, S, num_samples)

  improved = True
  while improved:
    improved = False
    best_swap = None

    for u in list(S):
      for v in nodes_not_in_set:        
        
        D_new = (S - {u}) | {v}

        temp_epc = epc_mc_deleted(G, D_new, num_samples)

        if temp_epc < current_epc:
            current_epc = temp_epc
            best_swap = (u, v)
            improved = True

    if improved and best_swap:
      u, v = best_swap

      S.remove(u)
      S.add(v)
      nodes_not_in_set.remove(v)
      nodes_not_in_set.add(u)
  
  return S

def rega(G: nx.Graph,
        k: int,
        num_samples: int = 100_000,
        max_iter: int = 1,
        # epsilon: float = None,
        # delta: float = None,
        use_tqdm: bool = False):
    """
    Full REGA pipeline: LP‐rounding + CSP‐refined local swaps.
    """

    csr = nx_to_csr(G)

    # iterative rounding
    D = set()
    for _ in range(k):
      # s_vals, _ = solve_lp_(G, pre_fixed=D, k=k)
      s_vals, _ = solve_lp_reaga_sparse(G, pre_fixed=D, k=k)

      # pick the fractional s_i largest among V\D
      u = max((v for v in G.nodes() if v not in D),
              key=lambda v: s_vals[v])
      D.add(u)

    # local‐swap refinement

    S_opt = local_search_(G, D, num_samples)
    
    # S_opt = local_search_(G, greedy_es_S, num_samples)

    # S_opt = local_search_swap(
    #   D, csr=csr, num_samples=num_samples, max_iter=max_iter)
    
    # improved = True
    
    # while improved:

    #     improved = False
    #     best_epc = current_epc
    #     best_swap = None

    #     for u in list(D):
    #         for v in G.nodes():

    #             if v in D: 
    #                 continue

    #             D_new = (D - {u}) | {v}

    #             epc_val = epc_func(G, D_new,
    #                                num_samples=num_samples,
    #                             #    epsilon=epsilon,
    #                             #    delta=delta,
    #                             #    use_tqdm=use_tqdm
    #                                )
                
    #             if epc_val < best_epc:
    #                 best_epc = epc_val
    #                 best_swap = (u, v)

    #     if best_swap is not None:

    #         u, v = best_swap
    #         D.remove(u)
    #         D.add(v)
    #         current_epc = best_epc
    #         improved = True

    return S_opt

In [9]:
GRAPHS_ROOT  = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs"     # expecting graphs/<split>/<type>/*.pkl
LABELS_ROOT  = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/rega_labels"     # will mirror the same structure

# ----- budget percentage -----
ALPHA        = 0.10         # 10 % of nodes

# ----- MC parameters -----
MC_SAMPLES   = 10_000       # inside greedy
MC_EPC_SAVE  = 20_000

In [10]:
def build_and_save_label(pkl_path: str, alpha: float = ALPHA):
    # --- load --------------------------------------------------------
    with open(pkl_path, "rb") as f:
        G = pickle.load(f)["graph"]

    N = G.number_of_nodes()
    K = max(1, math.ceil(alpha * N))

    # print(f"K: {K}")
    
    # --- greedy delete set ------------------------------------------
    delete_set = rega(G, K, num_samples=MC_SAMPLES)

    # --- binary mask -------------------------------------------------
    mask = torch.zeros(N, dtype=torch.float32)
    mask[list(delete_set)] = 1.0

    # --- save --------------------------------------------------------
    #  graphs/<split>/<type>/foo.pkl  ->  labels/<split>/<type>/foo_labels.pt
    rel_dir   = os.path.relpath(os.path.dirname(pkl_path), GRAPHS_ROOT)
    save_dir  = os.path.join(LABELS_ROOT, rel_dir)
    os.makedirs(save_dir, exist_ok=True)

    fname_out = os.path.basename(pkl_path).replace(".pkl", "_labels.pt")
    torch.save(mask, os.path.join(save_dir, fname_out))

In [11]:
# label_path = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs"

all_graphs = glob.glob(os.path.join(GRAPHS_ROOT, '*', '*.pkl'))

for fp in tqdm(all_graphs, desc="building greedy labels"):
    build_and_save_label(fp, alpha=ALPHA)

print("✓ Finished.  All binary-mask labels written to", LABELS_ROOT)

building greedy labels: 100%|██████████| 900/900 [3:19:13<00:00, 13.28s/it]   

✓ Finished.  All binary-mask labels written to /home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/rega_labels





## Previous label generation code

In [63]:
import glob

all_graphs = glob.glob(os.path.join(graphs_dir, '*', '*.pkl'))

for fp in tqdm(all_graphs, total=len(all_graphs), desc="Computing labels"):
    compute_labels(fp, 10_000)

Computing labels:   0%|          | 0/1080 [00:00<?, ?it/s]


NameError: name 'compute_labels' is not defined

In [None]:
data = torch.load("path/to/your_file.pt", map_location="cpu")

# 2. See what you got:
print(type(data))
# e.g. <class 'dict'> (often a state_dict) or a ScriptModule

# 3. If it’s a dict of tensors (state_dict):
if isinstance(data, dict):
    for k, v in data.items():
        print(f"{k:40s} → {tuple(v.shape) if hasattr(v, 'shape') else type(v)}")

In [25]:
import os
import torch

base_folder = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels"         # Folder with .pt files
output_folder = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels_txt"   # Destination folder for .txt files

for root, _, files in os.walk(base_folder):
    for file in files:
        if file.endswith(".pt"):
            pt_path = os.path.join(root, file)
            
            # Load tensor or model (depending on format)
            try:
                content = torch.load(pt_path, map_location='cpu')
            except Exception as e:
                print(f"Skipping {pt_path} due to load error: {e}")
                continue
            
            # Generate corresponding txt path
            rel_path = os.path.relpath(pt_path, base_folder)
            txt_path = os.path.join(output_folder, os.path.splitext(rel_path)[0] + ".txt")
            os.makedirs(os.path.dirname(txt_path), exist_ok=True)

            # Write to text file
            try:
                with open(txt_path, "w") as f:
                    f.write(str(content))
            except Exception as e:
                print(f"Error writing {txt_path}: {e}")

# 2. Feature Engineering

In [26]:
def get_neigbors(g, node, depth):
    output = {}
    layers = dict(nx.bfs_successors(g, source=node, depth_limit=depth))
    nodes = [node]
    for i in range(1, depth + 1):
        output[i] = []
        for x in nodes:
            output[i].extend(layers.get(x, []))
        nodes = output[i]
    return output

In [27]:
def get_dgl_g_input(G):
    input = torch.ones(len(G), 11)
    for i in G.nodes():
        input[i, 0] = G.degree()[i]
        input[i, 1] = sum([G.degree()[j] for j in list(G.neighbors(i))]) / max(len(list(G.neighbors(i))), 1)
        input[i, 2] = sum([nx.clustering(G, j) for j in list(G.neighbors(i))]) / max(len(list(G.neighbors(i))), 1)
        egonet = G.subgraph(list(G.neighbors(i)) + [i])
        input[i, 3] = len(egonet.edges())
        input[i, 4] = sum([G.degree()[j] for j in egonet.nodes()]) - 2 * input[i, 3]

    for l in [1, 2, 3]:
        for i in G.nodes():
            ball = get_neigbors(G, i, l)
            input[i, 5 + l - 1] = (G.degree()[i] - 1) * sum([G.degree()[j] - 1 for j in ball[l]])

    v = nx.voterank(G)
    votescore = dict()
    
    for i in list(G.nodes()): votescore[i] = 0
    for i in range(len(v)):
        votescore[v[i]] = len(G) - i
    e = nx.eigenvector_centrality(G, max_iter=1000)
    k = nx.core_number(G)
    for i in G.nodes():
        input[i, 8] = votescore[i]
        input[i, 9] = e[i]
        input[i, 10] = k[i]
    for i in range(len(input[0])):
        if max(input[:, i]) != 0:
            input[:, i] = input[:, i] / max(input[:, i])
    return input

# 3. Traininig

In [3]:
def extract_node_features(G):
    """
    Compute per-node structural features for DGL input.
    Returns: torch.FloatTensor of shape [num_nodes, 11]
    Features:
      0: degree
      1: avg neighbor degree
      2: avg neighbor clustering coeff
      3: egonet edge count
      4: egonet sum-degree minus internal edges (volume)
      5-7: l-hop neighbor sum-degree offsets for l=1,2,3
      8: voterank score
      9: eigenvector centrality
     10: k-core number
    Normalized per feature by dividing by feature-wise max.
    """
    n = G.number_of_nodes()
    feats = torch.ones(n, 11)

    # precompute degrees and clustering
    deg = dict(G.degree())
    clust = nx.clustering(G)

    # voterank ordering and scoring
    order = nx.voterank(G)
    vote_score = {u: n - i for i, u in enumerate(order)}

    # eigenvector centrality
    eig = nx.eigenvector_centrality(G, max_iter=500)
    core = nx.core_number(G)

    # compute for each node
    for u in G.nodes():
        nbrs = list(G.neighbors(u))
        feats[u, 0] = deg[u]
        feats[u, 1] = sum(deg[v] for v in nbrs) / max(len(nbrs), 1)
        feats[u, 2] = sum(clust[v] for v in nbrs) / max(len(nbrs), 1)
        egonet = G.subgraph(nbrs + [u])
        feats[u, 3] = egonet.number_of_edges()
        feats[u, 4] = sum(deg[v] for v in egonet.nodes()) - 2 * feats[u, 3]
        # l-hop neighbor sums
        for l in (1,2,3):
            # BFS up to l hops
            visited = {u}
            queue = deque([(u, 0)])
            hop_nodes = set()
            while queue:
                v, d = queue.popleft()
                if d == l: continue
                for w in G.neighbors(v):
                    if w not in visited:
                        visited.add(w)
                        queue.append((w, d+1))
                        if d+1 == l:
                            hop_nodes.add(w)
            feats[u, 4 + l] = sum(deg[v] - 1 for v in hop_nodes)
        feats[u, 8] = vote_score.get(u, 0)
        feats[u, 9] = eig.get(u, 0)
        feats[u, 10] = core.get(u, 0)

    # normalize each feature dimension
    for i in range(feats.size(1)):
        col = feats[:, i]
        maxval = col.max()
        if maxval > 0:
            feats[:, i] = col / maxval
    return feats

In [4]:
# class SAGE2AttnModel_fanout(nn.Module):
#     def __init__(self, in_dim, hidden_dim=128, num_heads=2):
#         super().__init__()
#         self.layers = nn.ModuleList()
#         self.norms  = nn.ModuleList()

#         fanouts = [15, 10, 5]

#         for _ in fanouts:
#             self.layers.append(SAGEConv(in_dim, hidden_dim, 'lstm'))
#             self.norms.append(nn.LayerNorm(hidden_dim))
#             in_dim = hidden_dim
            
#         self.attn = nn.MultiheadAttention(hidden_dim, num_heads)
#         self.out  = nn.Linear(hidden_dim, 1)

#     def forward(self, g, x):
#         h = x
#         for sage, norm in zip(self.layers, self.norms):
#             h = F.relu(norm(sage(g, h)))
#         # attention works with seq_len x batch_size x hidden
#         # here nodes as sequence, batch=1
#         h2, _ = self.attn(h.unsqueeze(1), h.unsqueeze(1), h.unsqueeze(1))
#         h2 = h2.squeeze(1)
#         return self.out(h2).squeeze(-1)

class SAGE2AttnModel(nn.Module):
    """
    GraphSAGE + attention model using full-graph (no neighbor sampling).
    Three SAGEConv layers followed by multi-head self-attention and output head.
    """
    def __init__(self, in_dim, hidden_dim=128, num_heads=2, num_layers=3):
        super().__init__()
        self.layers = nn.ModuleList()
        self.norms  = nn.ModuleList()
        # Build fixed number of layers without explicit fan-out

        for _ in range(num_layers):
            self.layers.append(SAGEConv(in_dim, hidden_dim, 'lstm'))
            self.norms.append(nn.LayerNorm(hidden_dim))
            in_dim = hidden_dim

        # self-attention across all nodes

        self.attn = nn.MultiheadAttention(hidden_dim, num_heads)
        self.out  = nn.Linear(hidden_dim, 1)

    def forward(self, g, x):
        h = x
        # message-passing over full graph
        for sage, norm in zip(self.layers, self.norms):
            h = F.relu(norm(sage(g, h)))
        # apply self-attention: treat nodes as sequence length
        h2, _ = self.attn(h.unsqueeze(1), h.unsqueeze(1), h.unsqueeze(1))
        h2 = h2.squeeze(1)
        return self.out(h2).squeeze(-1)

In [4]:
from torch_geometric.data import Data, Dataset

class GraphEPCDataset(Dataset):
    def __init__(self, graphs_dir, labels_dir, split):
        self.graph_paths = glob.glob(os.path.join(graphs_dir, split, '*.pkl'))
        self.labels_dir  = os.path.join(labels_dir, split)

    def __len__(self):
        return len(self.graph_paths)

    def __getitem__(self, idx):
        # --------  load graph ----------
        path = self.graph_paths[idx]
        G_nx  = pickle.load(open(path, 'rb'))['graph']

        # node-level features
        x = extract_node_features(G_nx)          # [N, 11]

        # edge index & probabilities
        edges      = list(G_nx.edges())
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)  # undirected
        p_list     = [G_nx[u][v]['p'] for u, v in edges]
        edge_prob  = torch.tensor(p_list + p_list, dtype=torch.float)

        # labels  (make sure they are float for MSELoss)
        lbl_name = os.path.basename(path).replace('.pkl', '_labels.pt')
        y        = torch.load(os.path.join(self.labels_dir, lbl_name)).float()

        # --------  wrap in Data ----------
        data = Data(x=x,
                    edge_index=edge_index,
                    edge_prob=edge_prob,
                    y=y)   
        
        data.file_name = os.path.basename(path)
        data.idx = torch.tensor(idx, dtype=torch.long)  # add index for reference

        return data

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import softmax

class EdgeProbGATConv(MessagePassing):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 heads: int = 2,
                 negative_slope: float = 0.2,
                 dropout: float = 0.2,
                 concat: bool = True,
                 bias: bool = True):
        super().__init__(aggr='add', node_dim=0)  # standard GAT aggregation
        
        self.in_channels   = in_channels
        self.out_channels  = out_channels
        self.heads         = heads
        self.negative_slope= negative_slope
        self.dropout       = dropout
        self.concat        = concat

        # Linear projection for query/key/value
        self.lin = nn.Linear(in_channels, heads * out_channels, bias=False)
        # Attention weights aᵀ [Wh_i || Wh_j]
        self.att = nn.Parameter(torch.Tensor(1, heads, 2*out_channels + 1))
        # self.att = nn.Parameter(torch.Tensor(1, heads, 2*out_channels))

        if bias and concat:
            self.bias = nn.Parameter(torch.Tensor(heads * out_channels))
        elif bias and not concat:
            self.bias = nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.lin.weight)
        nn.init.xavier_uniform_(self.att)
        if self.bias is not None:
            nn.init.zeros_(self.bias)

    def forward(self,
                x: torch.Tensor,
                edge_index: torch.LongTensor,
                edge_prob: torch.Tensor):
        """
        x: [N, in_channels]
        edge_index: [2, E]
        edge_prob: [E]   (the p_ij for each edge in edge_index order)
        """
        N = x.size(0)
        # 1. Linearly project node features to multi-head space
        x = self.lin(x)                              # [N, heads*out]
        x = x.view(N, self.heads, self.out_channels) # [N, heads, out]

        # 2. Start propagation
        out = self.propagate(edge_index, x=x, edge_prob=edge_prob, size=(N, N))
        # out: [N, heads, out]

        # 3. Concat or average heads
        if self.concat:
            out = out.view(N, self.heads * self.out_channels)
        else:
            out = out.mean(dim=1)  # [N, out]

        if self.bias is not None:
            out = out + self.bias

        return out

    # def message(self,
    #             x_j: torch.Tensor,
    #             x_i: torch.Tensor,
    #             edge_prob: torch.Tensor,
    #             index: torch.LongTensor,
    #             ptr,
    #             size_i):
    #     """
    #     x_j, x_i: [E, heads, out_channels] (sender and receiver node reps)
    #     edge_prob: [E]           (scalar reliability)
    #     index:   [E]             (destination node indices)
    #     """
    #     # 1. compute standard attention logits: aᵀ [Wh_i || Wh_j]
    #     cat = torch.cat([x_i, x_j], dim=-1)           # [E, heads, 2*out]
    #     alpha = (cat * self.att).sum(dim=-1)          # [E, heads]

    #     # 2. add log(edge_prob)
    #     log_p = edge_prob.log().unsqueeze(-1)        # [E, 1]
    #     alpha = alpha + log_p                        # broadcasting to [E, heads]

    #     # 3. leaky‐relu + softmax over all incoming edges
    #     alpha = F.leaky_relu(alpha, self.negative_slope)
    #     alpha = softmax(alpha, index, ptr, size_i)    # [E, heads]

    #     # 4. dropout on attention weights
    #     alpha = F.dropout(alpha, p=self.dropout, training=self.training)

    #     # 5. scale messages
    #     return x_j * alpha.unsqueeze(-1)             # [E, heads, out]

    def message(self, x_j, x_i, edge_prob, index, ptr, size_i):
        # concat node reps and edge scalar
        edge_prob = edge_prob.view(-1, 1, 1)               # [E,1,1]
        cat = torch.cat([x_i, x_j, edge_prob.expand(-1, self.heads, 1)], dim=-1)
        # shape: [E, heads, 2*out+1]

        alpha = (cat * self.att).sum(dim=-1)               # [E, heads]
        alpha = F.leaky_relu(alpha, self.negative_slope)
        alpha = softmax(alpha, index, ptr, size_i)
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        return x_j * alpha.unsqueeze(-1)

    def update(self, aggr_out):
        # aggr_out: [N, heads, out] if concat else [N, out]
        return aggr_out

In [6]:
class SAGEEdgeProbModel(nn.Module):
    def __init__(self, in_dim, hidden_dim=256, heads=4):
        super().__init__()

        self.conv1 = SAGEConv(in_dim,  hidden_dim, normalize=True)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim, normalize=True)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.conv3 = SAGEConv(hidden_dim, hidden_dim, normalize=True)
        self.bn3 = nn.BatchNorm1d(hidden_dim)

        # now our custom GAT that adds log(p_ij)
        self.gat_edge = EdgeProbGATConv(hidden_dim, hidden_dim, 
                                        heads=heads, dropout=0.3)
        
        self.out       = nn.Linear(heads * hidden_dim, 1)  # if concat=True

    def forward(self, x, edge_index, edge_prob):
        # x: [N, in_dim], edge_prob: [E]
        h = F.relu(self.bn1(self.conv1(x, edge_index)))
        h = F.relu(self.bn2(self.conv2(h, edge_index))) + h
        h = F.relu(self.bn3(self.conv3(h, edge_index))) + h

        # incorporate per-edge probabilities
        h = self.gat_edge(h, edge_index, edge_prob)  # [N, heads*out]

        return self.out(h).squeeze(-1)               # [N]

## Regression

In [None]:
from torch_geometric.loader import DataLoader 

def train_model():
    SEED = 42
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    in_dim = 11                   # keep your original setting
    model  = SAGEEdgeProbModel(in_dim).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
    loss_fn  = nn.MSELoss()

    train_ds = GraphEPCDataset(graphs_dir, labels_dir, 'train')
    val_ds   = GraphEPCDataset(graphs_dir, labels_dir, 'val')

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=32)

    best_val = float('inf')
    for epoch in range(1, 31):
        # -------------------- training --------------------
        model.train()
        total_loss = 0.0
        for data in train_loader:                   # data is a Batch
            data = data.to(device)                  
            preds = model(data.x, data.edge_index, data.edge_prob)
            loss  = loss_fn(preds, data.y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)

        # -------------------- validation ------------------
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)
                val_loss += loss_fn(
                    model(data.x, data.edge_index, data.edge_prob),
                    data.y
                ).item()
        val_loss /= len(val_loader)

        print(f"Epoch {epoch:02d}: Train={avg_loss:.4f} | Val={val_loss:.4f}")

        if val_loss < best_val:
            best_val = val_loss
            torch.save(model.state_dict(),
                       os.path.join(base_dir, 'best_model.pt'))

## Binary

In [55]:
GRAPHS_DIR = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs"
LABELS_DIR = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs_labels"
LABELS_OLD_DIR = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels"

In [56]:
BATCH = 256
EPOCHS = 50

In [57]:
class FocalLoss(nn.Module):
    def __init__(self, gamma: float = 2.0, alpha: float | None = None):
        super().__init__()
        self.gamma = gamma
        # optional α-balancing (same role as pos_weight)
        self.alpha = alpha            # scalar ∈ (0,1) or None

    def forward(self, logits, targets):
        # logits: [N], targets: 0/1 floats
        prob = torch.sigmoid(logits)
        pt   = prob * targets + (1 - prob) * (1 - targets)   # p_t
        focal = (1 - pt) ** self.gamma
        logp  = F.binary_cross_entropy_with_logits(
                    logits, targets, reduction='none')
        if self.alpha is not None:
            α_t = self.alpha * targets + (1 - self.alpha) * (1 - targets)
            logp = α_t * logp
        return (focal * logp).mean()


In [58]:
from torch_geometric.loader import DataLoader as PyGDataLoader
from torchmetrics.classification import BinaryAUROC
import torch.nn as nn, torch

base_dir     = 'data'

def train_model():
    SEED = 42
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ---------------- model ----------------
    model = SAGEEdgeProbModel(in_dim=11).to(device)
    optimizer  = torch.optim.AdamW(model.parameters(), 
                                   lr=1e-3, weight_decay=1e-4)
    # scheduler  = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #                optimizer, mode='min', factor=0.5, patience=4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=4, min_lr=1e-5)
    
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #           optimizer, T_0=10, T_mult=2)

    pos_weight = torch.tensor(9.0, device=device)
    # loss_fn = nn.MSELoss()  # for regression task
    loss_fn    = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    # loss_fn = FocalLoss(gamma=2.0, alpha=0.10).to(device)

    # ---------------- data -----------------
    train_ds = GraphEPCDataset(GRAPHS_DIR, LABELS_DIR, 'train')
    val_ds   = GraphEPCDataset(GRAPHS_DIR, LABELS_DIR, 'val')

    train_loader = PyGDataLoader(train_ds, batch_size=BATCH, shuffle=True)
    val_loader   = PyGDataLoader(val_ds,   batch_size=BATCH)

    best_val = float('inf')
    for epoch in range(1, EPOCHS + 1):
        # ---------- training ----------
        model.train()
        total_loss = 0.0
        for batch in train_loader:
            batch = batch.to(device)
            logits = model(batch.x, batch.edge_index, batch.edge_prob)
            loss   = loss_fn(logits, batch.y)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)

        # scheduler.step()
        # ---------- validation ----------
        model.eval()
        val_loss = 0.0
        auroc = BinaryAUROC().to(device)

        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                logits = model(batch.x, batch.edge_index, batch.edge_prob)
                auroc.update(logits, batch.y.int())
                val_loss += loss_fn(logits, batch.y).item()
        val_loss /= len(val_loader)
        scheduler.step(val_loss)

        print(f"Epoch {epoch:02d} | train {avg_loss:.4f} "
              f"| val {val_loss:.4f} | AUROC {auroc.compute():.4f}")

        if val_loss < best_val:
            best_val = val_loss
            torch.save(model.state_dict(),
                       os.path.join(base_dir, 'best_model.pt'))

In [59]:
train_model()

Epoch 01 | train 1.3924 | val 1.2341 | AUROC 0.8254
Epoch 02 | train 1.3658 | val 1.2441 | AUROC 0.8197
Epoch 03 | train 1.5095 | val 1.2164 | AUROC 0.8079
Epoch 04 | train 1.0844 | val 1.2181 | AUROC 0.8481
Epoch 05 | train 0.9476 | val 1.2262 | AUROC 0.8237
Epoch 06 | train 0.8391 | val 1.2064 | AUROC 0.8447
Epoch 07 | train 0.8221 | val 1.2100 | AUROC 0.8608
Epoch 08 | train 0.7744 | val 1.2022 | AUROC 0.8467
Epoch 09 | train 0.7763 | val 1.2063 | AUROC 0.8724
Epoch 10 | train 0.7343 | val 1.2113 | AUROC 0.8669
Epoch 11 | train 0.7883 | val 1.2202 | AUROC 0.8662
Epoch 12 | train 0.7593 | val 1.1398 | AUROC 0.8875
Epoch 13 | train 0.7104 | val 1.2218 | AUROC 0.8654
Epoch 14 | train 0.6668 | val 1.1726 | AUROC 0.8761
Epoch 15 | train 0.6537 | val 1.1232 | AUROC 0.8803
Epoch 16 | train 0.6384 | val 1.0037 | AUROC 0.8878
Epoch 17 | train 0.6967 | val 1.0711 | AUROC 0.8941
Epoch 18 | train 0.6778 | val 0.9484 | AUROC 0.8964
Epoch 19 | train 0.6364 | val 0.8916 | AUROC 0.9034
Epoch 20 | t

In [62]:
from torch_geometric.loader import DataLoader

ROOT = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data"

graphs_dir   = f"{ROOT}/graphs/test_100_separate"          # same dirs you used for train/val
labels_dir   = f"{ROOT}/graphs_labels/test_100_separate"          # not needed for inference but Dataset expects it
ckpt_path    = f"{ROOT}/best_model_bse_50.pt" # saved in train_model()
K            = 10                     # number of nodes to delete
mc_samples   = 100_000                 # per-graph Monte-Carlo samples for EPC
device       = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------
# 1.  DATASET & DATALOADER  (batch_size = 1 for clarity)
# ------------------------------------------------------------
test_ds      = GraphEPCDataset(graphs_dir, labels_dir, split="ER")
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False)

# ------------------------------------------------------------
# 2.  LOAD MODEL
# ------------------------------------------------------------
in_dim       = 11                     # you decided to keep only the 11 node features
model        = SAGEEdgeProbModel(in_dim).to(device)
model.load_state_dict(torch.load(ckpt_path, map_location=device))
model.eval()

# ------------------------------------------------------------
# 3.  INFERENCE + EPC
# ------------------------------------------------------------
from tqdm import tqdm

all_epc = []
i = 0

for data in tqdm(test_loader, desc="Inference"):

    # if i == 30:
    #     break
    # move everything to GPU/CPU
    data = data.to(device)

    fname = data.file_name[0]
    
    if "sp0.0443" in fname and ("rp0.9" in fname or "rp1.0" in fname):
        # ---- 3.1 node scores ----
        print("file: ", data.file_name)
        with torch.no_grad():
            scores = model(data.x, data.edge_index, data.edge_prob)   # [N]
        
        # print(data.edge_prob)
        # ---- 3.2 pick top-K nodes ----
        # scores is already on the same device; .cpu() only if epc_mc_deleted needs CPU tensors
        topk = scores.topk(K, largest=True).indices.tolist()         # list[int]

        # ---- 3.3 compute EPC after deleting top-K ----
        #   We need the *NetworkX graph*; fetch it via the original .pkl
        #   The path is stored in test_ds.graph_paths[index] where `index`
        #   is the position in the dataset.  The DataLoader gives us that
        #   index in data.__dict__['idx']  (PyG attaches it automatically).
        idx  = data.idx.item()         # scalar tensor → int
        G_nx = pickle.load(open(test_ds.graph_paths[idx], 'rb'))['graph']
        
        epc_0   = epc_mc_deleted(G_nx.copy(), set(), num_samples=mc_samples)  
        epc_del = epc_mc_deleted(G_nx.copy(), set(topk), num_samples=mc_samples)

        all_epc.append(epc_del)

        delta   = epc_del - epc_0     # negative  ⇒ improvement
        print(f"{fname}  EPC₀={epc_0:.1f}  after={epc_del:.1f}  Δ={delta:+.1f}")

        print(f"Graph {idx:03d} | EPC(after delete) = {epc_del:.4f} | top-K = {topk}")
        
        print("top-scores id,logit:")
        print(sorted(zip(topk, scores[topk].tolist()), key=lambda x: -x[1])[:5])

        # confirm they exist in the NetworkX graph
        assert all(v in G_nx for v in topk)
        i += 1

# ------------------------------------------------------------
# 4.  SUMMARY
# ------------------------------------------------------------
import numpy as np
print(f"\nAverage EPC over {len(all_epc)} test graphs: {np.mean(all_epc):.4f}")

Inference:   0%|          | 0/60 [00:00<?, ?it/s]

Inference:  10%|█         | 6/60 [00:00<00:00, 55.78it/s]

file:  ['ER_sz100_sp0.0443_rp1.0_test100_2.pkl']


Inference:  32%|███▏      | 19/60 [00:00<00:01, 35.53it/s]

ER_sz100_sp0.0443_rp1.0_test100_2.pkl  EPC₀=4751.1  after=3652.9  Δ=-1098.2
Graph 009 | EPC(after delete) = 3652.9092 | top-K = [87, 27, 25, 1, 49, 60, 92, 94, 99, 57]
top-scores id,logit:
[(87, 5.062656402587891), (27, 2.9162180423736572), (25, 2.57100510597229), (1, 2.172837495803833), (49, 2.0008339881896973)]
file:  ['ER_sz100_sp0.0443_rp0.9_test100_1.pkl']


Inference:  50%|█████     | 30/60 [00:01<00:01, 28.15it/s]

ER_sz100_sp0.0443_rp0.9_test100_1.pkl  EPC₀=4815.7  after=3651.8  Δ=-1163.8
Graph 020 | EPC(after delete) = 3651.8450 | top-K = [38, 23, 89, 98, 76, 70, 34, 29, 30, 55]
top-scores id,logit:
[(38, 2.3899905681610107), (23, 2.260484457015991), (89, 2.113187313079834), (98, 1.8053447008132935), (76, 1.6229392290115356)]


Inference:  57%|█████▋    | 34/60 [00:01<00:00, 27.45it/s]

file:  ['ER_sz100_sp0.0443_rp0.9_test100_0.pkl']
ER_sz100_sp0.0443_rp0.9_test100_0.pkl  EPC₀=4889.8  after=3558.0  Δ=-1331.8
Graph 033 | EPC(after delete) = 3558.0082 | top-K = [75, 17, 25, 53, 74, 50, 57, 66, 71, 44]
top-scores id,logit:
[(75, 3.572188138961792), (17, 3.3117687702178955), (25, 3.134037494659424), (53, 2.73591685295105), (74, 2.701092481613159)]
file:  ['ER_sz100_sp0.0443_rp0.9_test100_2.pkl']


Inference:  65%|██████▌   | 39/60 [00:01<00:00, 26.32it/s]

ER_sz100_sp0.0443_rp0.9_test100_2.pkl  EPC₀=4801.1  after=3487.3  Δ=-1313.8
Graph 038 | EPC(after delete) = 3487.3290 | top-K = [82, 90, 4, 59, 95, 93, 28, 46, 44, 17]
top-scores id,logit:
[(82, 2.9729554653167725), (90, 2.7378523349761963), (4, 2.167814016342163), (59, 2.1636431217193604), (95, 2.11735463142395)]
file:  ['ER_sz100_sp0.0443_rp1.0_test100_1.pkl']
ER_sz100_sp0.0443_rp1.0_test100_1.pkl  EPC₀=4950.0  after=3827.2  Δ=-1122.8
Graph 043 | EPC(after delete) = 3827.1865 | top-K = [17, 29, 8, 26, 13, 81, 47, 2, 74, 7]
top-scores id,logit:
[(17, 4.122374534606934), (29, 3.8109734058380127), (8, 2.849382162094116), (26, 2.6491904258728027), (13, 2.555166482925415)]


Inference:  78%|███████▊  | 47/60 [00:01<00:00, 24.35it/s]

file:  ['ER_sz100_sp0.0443_rp1.0_test100_0.pkl']
ER_sz100_sp0.0443_rp1.0_test100_0.pkl  EPC₀=4750.2  after=3652.9  Δ=-1097.3
Graph 045 | EPC(after delete) = 3652.8750 | top-K = [18, 63, 57, 5, 54, 23, 84, 22, 33, 30]
top-scores id,logit:
[(18, 2.717495918273926), (63, 2.287719488143921), (57, 2.092172861099243), (5, 1.7826387882232666), (54, 1.7607983350753784)]


Inference: 100%|██████████| 60/60 [00:02<00:00, 29.57it/s]


Average EPC over 6 test graphs: 3638.3588





In [52]:
from torch_geometric.loader import DataLoader

graphs_dir   = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/graphs/test_large_separate"          # same dirs you used for train/val
labels_dir   = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/labels/test_large_separate"          # not needed for inference but Dataset expects it
ckpt_path    = "/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/best_model.pt" # saved in train_model()
K            = 10                     # number of nodes to delete
mc_samples   = 10_000                 # per-graph Monte-Carlo samples for EPC
device       = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------
# 1.  DATASET & DATALOADER  (batch_size = 1 for clarity)
# ------------------------------------------------------------
test_ds      = GraphEPCDataset(graphs_dir, labels_dir, split="ER")
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False)

# ------------------------------------------------------------
# 2.  LOAD MODEL
# ------------------------------------------------------------
in_dim       = 11                     # you decided to keep only the 11 node features
model        = SAGEEdgeProbModel(in_dim).to(device)
model.load_state_dict(torch.load(ckpt_path, map_location=device))
model.eval()

# ------------------------------------------------------------
# 3.  INFERENCE + EPC
# ------------------------------------------------------------
from tqdm import tqdm

all_epc = []
i = 0

for data in tqdm(test_loader, desc="Inference"):

    # if i == 30:
    #     break
    # move everything to GPU/CPU
    data = data.to(device)

    fname = data.file_name[0]
    
    if "sp0.0443" in fname and ("rp0.9" in fname or "rp1.0" in fname):
        # ---- 3.1 node scores ----
        print("file: ", data.file_name)
        with torch.no_grad():
            scores = model(data.x, data.edge_index, data.edge_prob)   # [N]
        
        # print(data.edge_prob)
        # ---- 3.2 pick top-K nodes ----
        # scores is already on the same device; .cpu() only if epc_mc_deleted needs CPU tensors
        topk = scores.topk(K, largest=True).indices.tolist()         # list[int]

        # ---- 3.3 compute EPC after deleting top-K ----
        #   We need the *NetworkX graph*; fetch it via the original .pkl
        #   The path is stored in test_ds.graph_paths[index] where `index`
        #   is the position in the dataset.  The DataLoader gives us that
        #   index in data.__dict__['idx']  (PyG attaches it automatically).
        idx  = data.idx.item()         # scalar tensor → int
        G_nx = pickle.load(open(test_ds.graph_paths[idx], 'rb'))['graph']
        
        epc_0   = epc_mc_deleted(G_nx.copy(), set(), num_samples=mc_samples)  
        epc_del = epc_mc_deleted(G_nx.copy(), set(topk), num_samples=mc_samples)

        all_epc.append(epc_del)

        delta   = epc_del - epc_0     # negative  ⇒ improvement
        print(f"{fname}  EPC₀={epc_0:.1f}  after={epc_del:.1f}  Δ={delta:+.1f}")

        print(f"Graph {idx:03d} | EPC(after delete) = {epc_del:.4f} | top-K = {topk}")
        
        print("top-scores id,logit:")
        print(sorted(zip(topk, scores[topk].tolist()), key=lambda x: -x[1])[:5])

        # confirm they exist in the NetworkX graph
        assert all(v in G_nx for v in topk)
        i += 1

# ------------------------------------------------------------
# 4.  SUMMARY
# ------------------------------------------------------------
import numpy as np
print(f"\nAverage EPC over {len(all_epc)} test graphs: {np.mean(all_epc):.4f}")

FileNotFoundError: [Errno 2] No such file or directory: '/home/tuguldurb/Development/Research/SCNDP/src/SCNDP/src/extension/learning/notebooks/gnn/data/best_model.pt'

In [39]:
# 0. CONFIGURATION
# ----------------------------------------------------------------------
graphs_dir  = "data/graphs/test_100/ER"
labels_dir  = "data/labels/test_100/ER"          # same sub-folder structure
K           = 10                            # how many nodes to delete
mc_samples  = 10_000                        # EPC Monte-Carlo samples

# ----------------------------------------------------------------------
# 1. HELPER: load graph + label file
# ----------------------------------------------------------------------
def load_graph_and_scores(pkl_path):
    G   = pickle.load(open(pkl_path, "rb"))["graph"]

    # label file has the same stem plus '_labels.pt'
    lbl_path = os.path.join(
        labels_dir,
        os.path.basename(pkl_path).replace(".pkl", "_labels.pt")
    )
    if not os.path.exists(lbl_path):
        raise FileNotFoundError(f"label file not found for {pkl_path}")

    # tensor shape [N], dtype=float
    log1_scores = torch.load(lbl_path)
    # undo stabilisation: score = exp(label) - 1
    scores = log1_scores.exp() - 1.0

    return G, scores

# ----------------------------------------------------------------------
# 2. MAIN LOOP
# ----------------------------------------------------------------------
for pkl in tqdm(sorted(glob.glob(os.path.join(graphs_dir, "*.pkl"))),
                desc="sanity"):

    G, scores = load_graph_and_scores(pkl)
    N         = G.number_of_nodes()

    # baseline EPC (no deletions)
    epc_0 = epc_mc_deleted(G.copy(), set(), num_samples=mc_samples)

    # top-K indices by descending score
    topk   = scores.topk(K).indices.tolist()       # list[int]
    epc_K  = epc_mc_deleted(G.copy(), set(topk), num_samples=mc_samples)

    print(f"{os.path.basename(pkl):<25}"
          f" EPC0={epc_0:7.4f}  "
          f"EPC-del={epc_K:7.4f}  "
          f"Δ={epc_K-epc_0:+.4f}  "
          f"K={K}")

print("done.")

sanity:   7%|▋         | 4/60 [00:00<00:01, 34.31it/s]

ER_sz100_sp0.0443_rp0.1_test100_0.pkl EPC0=39.5800  EPC-del=24.3855  Δ=-15.1945  K=10
ER_sz100_sp0.0443_rp0.1_test100_1.pkl EPC0=33.5550  EPC-del=20.0340  Δ=-13.5210  K=10
ER_sz100_sp0.0443_rp0.1_test100_2.pkl EPC0=36.4950  EPC-del=21.7890  Δ=-14.7060  K=10
ER_sz100_sp0.0443_rp0.2_test100_0.pkl EPC0=206.7300  EPC-del=74.4525  Δ=-132.2775  K=10
ER_sz100_sp0.0443_rp0.2_test100_1.pkl EPC0=225.4850  EPC-del=63.9765  Δ=-161.5085  K=10
ER_sz100_sp0.0443_rp0.2_test100_2.pkl EPC0=223.3700  EPC-del=71.1315  Δ=-152.2385  K=10
ER_sz100_sp0.0443_rp0.3_test100_0.pkl EPC0=1069.6250  EPC-del=201.0375  Δ=-868.5875  K=10


sanity:  20%|██        | 12/60 [00:00<00:01, 31.08it/s]

ER_sz100_sp0.0443_rp0.3_test100_1.pkl EPC0=1015.0900  EPC-del=188.9190  Δ=-826.1710  K=10
ER_sz100_sp0.0443_rp0.3_test100_2.pkl EPC0=868.6600  EPC-del=158.2245  Δ=-710.4355  K=10
ER_sz100_sp0.0443_rp0.4_test100_0.pkl EPC0=2556.6050  EPC-del=895.8645  Δ=-1660.7405  K=10
ER_sz100_sp0.0443_rp0.4_test100_1.pkl EPC0=2758.2750  EPC-del=723.4830  Δ=-2034.7920  K=10
ER_sz100_sp0.0443_rp0.4_test100_2.pkl EPC0=3103.0700  EPC-del=1142.8155  Δ=-1960.2545  K=10
ER_sz100_sp0.0443_rp0.5_test100_0.pkl EPC0=3225.4200  EPC-del=1195.1460  Δ=-2030.2740  K=10
ER_sz100_sp0.0443_rp0.5_test100_1.pkl EPC0=2999.6550  EPC-del=1153.9350  Δ=-1845.7200  K=10


sanity:  33%|███▎      | 20/60 [00:00<00:01, 30.12it/s]

ER_sz100_sp0.0443_rp0.5_test100_2.pkl EPC0=3702.0400  EPC-del=2174.4630  Δ=-1527.5770  K=10
ER_sz100_sp0.0443_rp0.6_test100_0.pkl EPC0=4439.0500  EPC-del=3007.7190  Δ=-1431.3310  K=10
ER_sz100_sp0.0443_rp0.6_test100_1.pkl EPC0=3891.4600  EPC-del=2129.1075  Δ=-1762.3525  K=10
ER_sz100_sp0.0443_rp0.6_test100_2.pkl EPC0=3904.3700  EPC-del=2063.5515  Δ=-1840.8185  K=10
ER_sz100_sp0.0443_rp0.7_test100_0.pkl EPC0=4354.1150  EPC-del=2876.7780  Δ=-1477.3370  K=10
ER_sz100_sp0.0443_rp0.7_test100_1.pkl EPC0=4180.1350  EPC-del=2393.1495  Δ=-1786.9855  K=10


sanity:  40%|████      | 24/60 [00:00<00:01, 30.21it/s]

ER_sz100_sp0.0443_rp0.7_test100_2.pkl EPC0=4177.4600  EPC-del=2403.7515  Δ=-1773.7085  K=10
ER_sz100_sp0.0443_rp0.8_test100_0.pkl EPC0=4775.0400  EPC-del=3512.2545  Δ=-1262.7855  K=10
ER_sz100_sp0.0443_rp0.8_test100_1.pkl EPC0=4638.9150  EPC-del=3093.4260  Δ=-1545.4890  K=10
ER_sz100_sp0.0443_rp0.8_test100_2.pkl EPC0=4552.2600  EPC-del=2915.7570  Δ=-1636.5030  K=10
ER_sz100_sp0.0443_rp0.9_test100_0.pkl EPC0=4895.4800  EPC-del=3300.3405  Δ=-1595.1395  K=10
ER_sz100_sp0.0443_rp0.9_test100_1.pkl EPC0=4818.5200  EPC-del=3452.0445  Δ=-1366.4755  K=10


sanity:  52%|█████▏    | 31/60 [00:01<00:01, 27.34it/s]

ER_sz100_sp0.0443_rp0.9_test100_2.pkl EPC0=4801.9200  EPC-del=3301.5600  Δ=-1500.3600  K=10
ER_sz100_sp0.0443_rp1.0_test100_0.pkl EPC0=4753.0000  EPC-del=3732.6150  Δ=-1020.3850  K=10
ER_sz100_sp0.0443_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3734.1630  Δ=-1215.8370  K=10
ER_sz100_sp0.0443_rp1.0_test100_2.pkl EPC0=4762.6400  EPC-del=3469.4370  Δ=-1293.2030  K=10
ER_sz100_sp0.0667_rp0.1_test100_0.pkl EPC0=88.1100  EPC-del=48.2355  Δ=-39.8745  K=10
ER_sz100_sp0.0667_rp0.1_test100_1.pkl EPC0=90.0400  EPC-del=46.6920  Δ=-43.3480  K=10


sanity:  65%|██████▌   | 39/60 [00:01<00:00, 30.30it/s]

ER_sz100_sp0.0667_rp0.1_test100_2.pkl EPC0=83.4400  EPC-del=44.3160  Δ=-39.1240  K=10
ER_sz100_sp0.0667_rp0.2_test100_0.pkl EPC0=818.1300  EPC-del=235.2825  Δ=-582.8475  K=10
ER_sz100_sp0.0667_rp0.2_test100_1.pkl EPC0=891.2250  EPC-del=266.9040  Δ=-624.3210  K=10
ER_sz100_sp0.0667_rp0.2_test100_2.pkl EPC0=930.5250  EPC-del=252.3150  Δ=-678.2100  K=10
ER_sz100_sp0.0667_rp0.3_test100_0.pkl EPC0=2964.3100  EPC-del=1183.6575  Δ=-1780.6525  K=10
ER_sz100_sp0.0667_rp0.3_test100_1.pkl EPC0=2410.0000  EPC-del=663.5295  Δ=-1746.4705  K=10
ER_sz100_sp0.0667_rp0.3_test100_2.pkl EPC0=3247.7200  EPC-del=1636.1055  Δ=-1611.6145  K=10


sanity:  72%|███████▏  | 43/60 [00:01<00:00, 27.68it/s]

ER_sz100_sp0.0667_rp0.4_test100_0.pkl EPC0=4154.5450  EPC-del=2779.1865  Δ=-1375.3585  K=10
ER_sz100_sp0.0667_rp0.4_test100_1.pkl EPC0=3980.9750  EPC-del=2569.8780  Δ=-1411.0970  K=10
ER_sz100_sp0.0667_rp0.4_test100_2.pkl EPC0=4190.5700  EPC-del=2892.8925  Δ=-1297.6775  K=10
ER_sz100_sp0.0667_rp0.5_test100_0.pkl EPC0=4563.9500  EPC-del=3225.7170  Δ=-1338.2330  K=10
ER_sz100_sp0.0667_rp0.5_test100_1.pkl EPC0=4516.0250  EPC-del=3048.5700  Δ=-1467.4550  K=10


sanity:  82%|████████▏ | 49/60 [00:01<00:00, 25.43it/s]

ER_sz100_sp0.0667_rp0.5_test100_2.pkl EPC0=4620.4350  EPC-del=3324.2175  Δ=-1296.2175  K=10
ER_sz100_sp0.0667_rp0.6_test100_0.pkl EPC0=4694.0150  EPC-del=3240.7605  Δ=-1453.2545  K=10
ER_sz100_sp0.0667_rp0.6_test100_1.pkl EPC0=4712.3650  EPC-del=3333.5055  Δ=-1378.8595  K=10
ER_sz100_sp0.0667_rp0.6_test100_2.pkl EPC0=4796.9400  EPC-del=3588.8085  Δ=-1208.1315  K=10
ER_sz100_sp0.0667_rp0.7_test100_0.pkl EPC0=4847.3450  EPC-del=3584.6640  Δ=-1262.6810  K=10


sanity:  87%|████████▋ | 52/60 [00:01<00:00, 24.83it/s]

ER_sz100_sp0.0667_rp0.7_test100_1.pkl EPC0=4886.2050  EPC-del=3756.9150  Δ=-1129.2900  K=10
ER_sz100_sp0.0667_rp0.7_test100_2.pkl EPC0=4821.6300  EPC-del=3510.3735  Δ=-1311.2565  K=10
ER_sz100_sp0.0667_rp0.8_test100_0.pkl EPC0=4897.6350  EPC-del=3646.3185  Δ=-1251.3165  K=10
ER_sz100_sp0.0667_rp0.8_test100_1.pkl EPC0=4939.0500  EPC-del=3774.8520  Δ=-1164.1980  K=10
ER_sz100_sp0.0667_rp0.8_test100_2.pkl EPC0=4917.1750  EPC-del=3633.2460  Δ=-1283.9290  K=10


sanity:  97%|█████████▋| 58/60 [00:02<00:00, 23.84it/s]

ER_sz100_sp0.0667_rp0.9_test100_0.pkl EPC0=4927.1100  EPC-del=3651.8580  Δ=-1275.2520  K=10
ER_sz100_sp0.0667_rp0.9_test100_1.pkl EPC0=4947.4750  EPC-del=3736.6740  Δ=-1210.8010  K=10
ER_sz100_sp0.0667_rp0.9_test100_2.pkl EPC0=4927.9550  EPC-del=3400.4925  Δ=-1527.4625  K=10
ER_sz100_sp0.0667_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=4005.0000  Δ=-945.0000  K=10
ER_sz100_sp0.0667_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3912.4800  Δ=-1037.5200  K=10


sanity: 100%|██████████| 60/60 [00:02<00:00, 27.23it/s]

ER_sz100_sp0.0667_rp1.0_test100_2.pkl EPC0=4851.9800  EPC-del=3733.3890  Δ=-1118.5910  K=10
done.





In [4]:
# 0. CONFIGURATION
# ----------------------------------------------------------------------
graphs_dir  = "data/graphs/test_100"
labels_dir  = "data/rega_labels/test_100"          # same sub-folder structure
K           = 10                            # how many nodes to delete
mc_samples  = 10_000                        # EPC Monte-Carlo samples

# ----------------------------------------------------------------------
# 1. HELPER: load graph + label file
# ----------------------------------------------------------------------
def load_graph_and_scores(pkl_path):
    G   = pickle.load(open(pkl_path, "rb"))["graph"]

    # label file has the same stem plus '_labels.pt'
    lbl_path = os.path.join(
        labels_dir,
        os.path.basename(pkl_path).replace(".pkl", "_labels.pt")
    )
    if not os.path.exists(lbl_path):
        raise FileNotFoundError(f"label file not found for {pkl_path}")

    # tensor shape [N], dtype=float
    scores = torch.load(lbl_path)
    # undo stabilisation: score = exp(label) - 1
    # scores = log1_scores.exp() - 1.0

    return G, scores

# ----------------------------------------------------------------------
# 2. MAIN LOOP
# ----------------------------------------------------------------------
for pkl in tqdm(sorted(glob.glob(os.path.join(graphs_dir, "*.pkl"))),
                desc="sanity"):

    G, scores = load_graph_and_scores(pkl)
    N         = G.number_of_nodes()

    # baseline EPC (no deletions)
    epc_0 = epc_mc_deleted(G.copy(), set(), num_samples=mc_samples)

    # top-K indices by descending score
    # print(scores)
    topk   = scores.topk(K).indices.tolist()       # list[int]
    epc_K  = epc_mc_deleted(G.copy(), set(topk), num_samples=mc_samples)

    print(f"{os.path.basename(pkl):<25}"
          f" EPC0={epc_0:7.4f}  "
          f"EPC-del={epc_K:7.4f}  "
          f"Δ={epc_K-epc_0:+.4f}  "
          f"K={K}")

print("done.")

sanity:   3%|▎         | 5/180 [00:01<00:37,  4.64it/s]

BA_sz100_sp2_rp0.1_test100_0.pkl EPC0=42.3950  EPC-del= 9.8190  Δ=-32.5760  K=10
BA_sz100_sp2_rp0.1_test100_1.pkl EPC0=46.6100  EPC-del= 9.4995  Δ=-37.1105  K=10
BA_sz100_sp2_rp0.1_test100_2.pkl EPC0=45.5850  EPC-del= 9.3330  Δ=-36.2520  K=10
BA_sz100_sp2_rp0.2_test100_0.pkl EPC0=238.3150  EPC-del=24.9120  Δ=-213.4030  K=10
BA_sz100_sp2_rp0.2_test100_1.pkl EPC0=224.1650  EPC-del=25.5510  Δ=-198.6140  K=10
BA_sz100_sp2_rp0.2_test100_2.pkl EPC0=254.5900  EPC-del=21.3345  Δ=-233.2555  K=10
BA_sz100_sp2_rp0.3_test100_0.pkl EPC0=901.0350  EPC-del=48.6540  Δ=-852.3810  K=10


sanity:   7%|▋         | 13/180 [00:01<00:13, 12.75it/s]

BA_sz100_sp2_rp0.3_test100_1.pkl EPC0=882.4300  EPC-del=49.8555  Δ=-832.5745  K=10
BA_sz100_sp2_rp0.3_test100_2.pkl EPC0=888.5700  EPC-del=41.4630  Δ=-847.1070  K=10
BA_sz100_sp2_rp0.4_test100_0.pkl EPC0=1990.2350  EPC-del=79.0830  Δ=-1911.1520  K=10
BA_sz100_sp2_rp0.4_test100_1.pkl EPC0=1999.4700  EPC-del=90.2115  Δ=-1909.2585  K=10
BA_sz100_sp2_rp0.4_test100_2.pkl EPC0=1977.6900  EPC-del=95.9355  Δ=-1881.7545  K=10
BA_sz100_sp2_rp0.5_test100_0.pkl EPC0=2984.7950  EPC-del=107.8785  Δ=-2876.9165  K=10
BA_sz100_sp2_rp0.5_test100_1.pkl EPC0=3031.0350  EPC-del=119.0700  Δ=-2911.9650  K=10


sanity:  11%|█         | 19/180 [00:01<00:08, 18.03it/s]

BA_sz100_sp2_rp0.5_test100_2.pkl EPC0=3027.6050  EPC-del=115.8750  Δ=-2911.7300  K=10
BA_sz100_sp2_rp0.6_test100_0.pkl EPC0=3825.3850  EPC-del=236.7900  Δ=-3588.5950  K=10
BA_sz100_sp2_rp0.6_test100_1.pkl EPC0=3831.2000  EPC-del=248.8365  Δ=-3582.3635  K=10
BA_sz100_sp2_rp0.6_test100_2.pkl EPC0=3793.5050  EPC-del=182.7855  Δ=-3610.7195  K=10
BA_sz100_sp2_rp0.7_test100_0.pkl EPC0=4388.2750  EPC-del=411.8130  Δ=-3976.4620  K=10
BA_sz100_sp2_rp0.7_test100_1.pkl EPC0=4362.2300  EPC-del=703.5615  Δ=-3658.6685  K=10
BA_sz100_sp2_rp0.7_test100_2.pkl EPC0=4369.7700  EPC-del=300.5325  Δ=-4069.2375  K=10
BA_sz100_sp2_rp0.8_test100_0.pkl EPC0=4718.8250  EPC-del=688.8510  Δ=-4029.9740  K=10


sanity:  20%|██        | 36/180 [00:02<00:03, 43.09it/s]

BA_sz100_sp2_rp0.8_test100_1.pkl EPC0=4712.4100  EPC-del=1039.2030  Δ=-3673.2070  K=10
BA_sz100_sp2_rp0.8_test100_2.pkl EPC0=4712.4550  EPC-del=895.1130  Δ=-3817.3420  K=10
BA_sz100_sp2_rp0.9_test100_0.pkl EPC0=4895.1000  EPC-del=1909.3410  Δ=-2985.7590  K=10
BA_sz100_sp2_rp0.9_test100_1.pkl EPC0=4905.0800  EPC-del=2196.6345  Δ=-2708.4455  K=10
BA_sz100_sp2_rp0.9_test100_2.pkl EPC0=4893.6150  EPC-del=631.6875  Δ=-4261.9275  K=10
BA_sz100_sp2_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=1639.1925  Δ=-3310.8075  K=10
BA_sz100_sp2_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=1477.0260  Δ=-3472.9740  K=10
BA_sz100_sp2_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=1111.8465  Δ=-3838.1535  K=10
BA_sz100_sp3_rp0.1_test100_0.pkl EPC0=93.5050  EPC-del=19.9800  Δ=-73.5250  K=10
BA_sz100_sp3_rp0.1_test100_1.pkl EPC0=87.6600  EPC-del=22.5000  Δ=-65.1600  K=10
BA_sz100_sp3_rp0.1_test100_2.pkl EPC0=93.7550  EPC-del=20.9970  Δ=-72.7580  K=10
BA_sz100_sp3_rp0.2_test100_0.pkl EPC0=912.9750  EPC-del=55.1430

sanity:  24%|██▍       | 43/180 [00:02<00:03, 45.52it/s]

BA_sz100_sp3_rp0.4_test100_1.pkl EPC0=3436.3600  EPC-del=276.7500  Δ=-3159.6100  K=10
BA_sz100_sp3_rp0.4_test100_2.pkl EPC0=3483.1100  EPC-del=437.1435  Δ=-3045.9665  K=10
BA_sz100_sp3_rp0.5_test100_0.pkl EPC0=4244.7550  EPC-del=1809.3825  Δ=-2435.3725  K=10
BA_sz100_sp3_rp0.5_test100_1.pkl EPC0=4164.4050  EPC-del=1017.9765  Δ=-3146.4285  K=10
BA_sz100_sp3_rp0.5_test100_2.pkl EPC0=4182.6600  EPC-del=1495.3140  Δ=-2687.3460  K=10
BA_sz100_sp3_rp0.6_test100_0.pkl EPC0=4599.6100  EPC-del=2138.8680  Δ=-2460.7420  K=10


sanity:  27%|██▋       | 49/180 [00:02<00:03, 36.87it/s]

BA_sz100_sp3_rp0.6_test100_1.pkl EPC0=4601.1000  EPC-del=2069.8335  Δ=-2531.2665  K=10
BA_sz100_sp3_rp0.6_test100_2.pkl EPC0=4605.6550  EPC-del=1839.3345  Δ=-2766.3205  K=10
BA_sz100_sp3_rp0.7_test100_0.pkl EPC0=4801.4650  EPC-del=2065.4055  Δ=-2736.0595  K=10
BA_sz100_sp3_rp0.7_test100_1.pkl EPC0=4831.4850  EPC-del=2518.7265  Δ=-2312.7585  K=10
BA_sz100_sp3_rp0.7_test100_2.pkl EPC0=4815.5900  EPC-del=2601.3690  Δ=-2214.2210  K=10
BA_sz100_sp3_rp0.8_test100_0.pkl EPC0=4918.0650  EPC-del=3186.8775  Δ=-1731.1875  K=10


sanity:  32%|███▏      | 58/180 [00:02<00:03, 31.02it/s]

BA_sz100_sp3_rp0.8_test100_1.pkl EPC0=4917.5550  EPC-del=3285.3195  Δ=-1632.2355  K=10
BA_sz100_sp3_rp0.8_test100_2.pkl EPC0=4909.4900  EPC-del=2905.2495  Δ=-2004.2405  K=10
BA_sz100_sp3_rp0.9_test100_0.pkl EPC0=4946.2300  EPC-del=3529.1835  Δ=-1417.0465  K=10
BA_sz100_sp3_rp0.9_test100_1.pkl EPC0=4944.4750  EPC-del=3412.9125  Δ=-1531.5625  K=10
BA_sz100_sp3_rp0.9_test100_2.pkl EPC0=4944.4350  EPC-del=2972.6505  Δ=-1971.7845  K=10
BA_sz100_sp3_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3472.0560  Δ=-1477.9440  K=10


sanity:  46%|████▌     | 82/180 [00:03<00:01, 62.88it/s]

BA_sz100_sp3_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3672.0000  Δ=-1278.0000  K=10
BA_sz100_sp3_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=3398.7285  Δ=-1551.2715  K=10
ER_sz100_sp0.0443_rp0.1_test100_0.pkl EPC0=38.8200  EPC-del=21.5505  Δ=-17.2695  K=10
ER_sz100_sp0.0443_rp0.1_test100_1.pkl EPC0=33.1500  EPC-del=18.8145  Δ=-14.3355  K=10
ER_sz100_sp0.0443_rp0.1_test100_2.pkl EPC0=38.7950  EPC-del=22.3830  Δ=-16.4120  K=10
ER_sz100_sp0.0443_rp0.2_test100_0.pkl EPC0=210.3650  EPC-del=72.7425  Δ=-137.6225  K=10
ER_sz100_sp0.0443_rp0.2_test100_1.pkl EPC0=226.6450  EPC-del=62.1495  Δ=-164.4955  K=10
ER_sz100_sp0.0443_rp0.2_test100_2.pkl EPC0=219.5850  EPC-del=61.5645  Δ=-158.0205  K=10
ER_sz100_sp0.0443_rp0.3_test100_0.pkl EPC0=1085.2800  EPC-del=183.4020  Δ=-901.8780  K=10
ER_sz100_sp0.0443_rp0.3_test100_1.pkl EPC0=990.1500  EPC-del=174.0105  Δ=-816.1395  K=10
ER_sz100_sp0.0443_rp0.3_test100_2.pkl EPC0=891.7450  EPC-del=123.5925  Δ=-768.1525  K=10
ER_sz100_sp0.0443_rp0.4_test100_0.pk

sanity:  57%|█████▋    | 102/180 [00:03<00:00, 78.41it/s]

ER_sz100_sp0.0443_rp0.9_test100_0.pkl EPC0=4888.6500  EPC-del=3233.8440  Δ=-1654.8060  K=10
ER_sz100_sp0.0443_rp0.9_test100_1.pkl EPC0=4816.9450  EPC-del=3147.4035  Δ=-1669.5415  K=10
ER_sz100_sp0.0443_rp0.9_test100_2.pkl EPC0=4801.4900  EPC-del=3121.3350  Δ=-1680.1550  K=10
ER_sz100_sp0.0443_rp1.0_test100_0.pkl EPC0=4753.4850  EPC-del=3347.9595  Δ=-1405.5255  K=10
ER_sz100_sp0.0443_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3238.4430  Δ=-1711.5570  K=10
ER_sz100_sp0.0443_rp1.0_test100_2.pkl EPC0=4753.5200  EPC-del=3095.2170  Δ=-1658.3030  K=10
ER_sz100_sp0.0667_rp0.1_test100_0.pkl EPC0=91.6850  EPC-del=41.9940  Δ=-49.6910  K=10
ER_sz100_sp0.0667_rp0.1_test100_1.pkl EPC0=91.3300  EPC-del=42.9255  Δ=-48.4045  K=10
ER_sz100_sp0.0667_rp0.1_test100_2.pkl EPC0=86.2950  EPC-del=39.5865  Δ=-46.7085  K=10
ER_sz100_sp0.0667_rp0.2_test100_0.pkl EPC0=823.7150  EPC-del=214.1730  Δ=-609.5420  K=10
ER_sz100_sp0.0667_rp0.2_test100_1.pkl EPC0=901.7350  EPC-del=219.5370  Δ=-682.1980  K=10
ER_sz100_sp0

sanity:  62%|██████▏   | 111/180 [00:03<00:00, 70.13it/s]

ER_sz100_sp0.0667_rp0.5_test100_1.pkl EPC0=4495.5550  EPC-del=3040.2045  Δ=-1455.3505  K=10
ER_sz100_sp0.0667_rp0.5_test100_2.pkl EPC0=4599.9450  EPC-del=3224.0430  Δ=-1375.9020  K=10
ER_sz100_sp0.0667_rp0.6_test100_0.pkl EPC0=4704.2700  EPC-del=3072.9240  Δ=-1631.3460  K=10
ER_sz100_sp0.0667_rp0.6_test100_1.pkl EPC0=4697.0800  EPC-del=3279.8070  Δ=-1417.2730  K=10
ER_sz100_sp0.0667_rp0.6_test100_2.pkl EPC0=4802.6650  EPC-del=3462.2910  Δ=-1340.3740  K=10
ER_sz100_sp0.0667_rp0.7_test100_0.pkl EPC0=4857.8950  EPC-del=3495.1680  Δ=-1362.7270  K=10
ER_sz100_sp0.0667_rp0.7_test100_1.pkl EPC0=4881.9050  EPC-del=3565.9710  Δ=-1315.9340  K=10
ER_sz100_sp0.0667_rp0.7_test100_2.pkl EPC0=4823.9100  EPC-del=3470.0535  Δ=-1353.8565  K=10
ER_sz100_sp0.0667_rp0.8_test100_0.pkl EPC0=4897.3250  EPC-del=3467.9250  Δ=-1429.4000  K=10
ER_sz100_sp0.0667_rp0.8_test100_1.pkl EPC0=4939.4750  EPC-del=3702.4920  Δ=-1236.9830  K=10
ER_sz100_sp0.0667_rp0.8_test100_2.pkl EPC0=4911.9950  EPC-del=3560.8410  Δ=-1351

sanity:  76%|███████▌  | 137/180 [00:03<00:00, 96.18it/s]

ER_sz100_sp0.0667_rp0.9_test100_2.pkl EPC0=4928.7200  EPC-del=3259.8000  Δ=-1668.9200  K=10
ER_sz100_sp0.0667_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3921.1920  Δ=-1028.8080  K=10
ER_sz100_sp0.0667_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3917.2320  Δ=-1032.7680  K=10
ER_sz100_sp0.0667_rp1.0_test100_2.pkl EPC0=4846.1000  EPC-del=3483.2610  Δ=-1362.8390  K=10
SW_sz100_sp4_rp0.1_test100_0.pkl EPC0=31.0550  EPC-del=19.6875  Δ=-11.3675  K=10
SW_sz100_sp4_rp0.1_test100_1.pkl EPC0=30.5100  EPC-del=20.6730  Δ=-9.8370  K=10
SW_sz100_sp4_rp0.1_test100_2.pkl EPC0=30.1450  EPC-del=19.0170  Δ=-11.1280  K=10
SW_sz100_sp4_rp0.2_test100_0.pkl EPC0=117.1400  EPC-del=54.9540  Δ=-62.1860  K=10
SW_sz100_sp4_rp0.2_test100_1.pkl EPC0=122.6250  EPC-del=49.4370  Δ=-73.1880  K=10
SW_sz100_sp4_rp0.2_test100_2.pkl EPC0=112.1600  EPC-del=49.0905  Δ=-63.0695  K=10
SW_sz100_sp4_rp0.3_test100_0.pkl EPC0=448.2400  EPC-del=135.6525  Δ=-312.5875  K=10
SW_sz100_sp4_rp0.3_test100_1.pkl EPC0=495.7750  EPC-del=138.

sanity:  89%|████████▉ | 160/180 [00:03<00:00, 101.10it/s]

SW_sz100_sp4_rp0.8_test100_2.pkl EPC0=4852.0850  EPC-del=3000.2625  Δ=-1851.8225  K=10
SW_sz100_sp4_rp0.9_test100_0.pkl EPC0=4933.0500  EPC-del=3373.9110  Δ=-1559.1390  K=10
SW_sz100_sp4_rp0.9_test100_1.pkl EPC0=4928.8150  EPC-del=3047.5800  Δ=-1881.2350  K=10
SW_sz100_sp4_rp0.9_test100_2.pkl EPC0=4932.9750  EPC-del=3457.1430  Δ=-1475.8320  K=10
SW_sz100_sp4_rp1.0_test100_0.pkl EPC0=4950.0000  EPC-del=3503.0340  Δ=-1446.9660  K=10
SW_sz100_sp4_rp1.0_test100_1.pkl EPC0=4950.0000  EPC-del=3263.3865  Δ=-1686.6135  K=10
SW_sz100_sp4_rp1.0_test100_2.pkl EPC0=4950.0000  EPC-del=3485.7945  Δ=-1464.2055  K=10
SW_sz100_sp5_rp0.1_test100_0.pkl EPC0=31.7050  EPC-del=22.9725  Δ=-8.7325  K=10
SW_sz100_sp5_rp0.1_test100_1.pkl EPC0=31.2050  EPC-del=19.0260  Δ=-12.1790  K=10
SW_sz100_sp5_rp0.1_test100_2.pkl EPC0=31.1350  EPC-del=18.5760  Δ=-12.5590  K=10
SW_sz100_sp5_rp0.2_test100_0.pkl EPC0=117.8400  EPC-del=53.2035  Δ=-64.6365  K=10
SW_sz100_sp5_rp0.2_test100_1.pkl EPC0=117.6750  EPC-del=55.9710  Δ=

sanity: 100%|██████████| 180/180 [00:04<00:00, 44.57it/s] 

SW_sz100_sp5_rp0.5_test100_2.pkl EPC0=3489.9650  EPC-del=1133.3070  Δ=-2356.6580  K=10
SW_sz100_sp5_rp0.6_test100_0.pkl EPC0=4269.7450  EPC-del=2173.5540  Δ=-2096.1910  K=10
SW_sz100_sp5_rp0.6_test100_1.pkl EPC0=4176.3950  EPC-del=1998.9630  Δ=-2177.4320  K=10
SW_sz100_sp5_rp0.6_test100_2.pkl EPC0=4245.5700  EPC-del=2099.9610  Δ=-2145.6090  K=10
SW_sz100_sp5_rp0.7_test100_0.pkl EPC0=4663.4000  EPC-del=2514.7215  Δ=-2148.6785  K=10
SW_sz100_sp5_rp0.7_test100_1.pkl EPC0=4679.1550  EPC-del=2844.5805  Δ=-1834.5745  K=10
SW_sz100_sp5_rp0.7_test100_2.pkl EPC0=4637.8950  EPC-del=2650.3740  Δ=-1987.5210  K=10
SW_sz100_sp5_rp0.8_test100_0.pkl EPC0=4851.0300  EPC-del=3178.2105  Δ=-1672.8195  K=10
SW_sz100_sp5_rp0.8_test100_1.pkl EPC0=4854.3400  EPC-del=3019.7655  Δ=-1834.5745  K=10
SW_sz100_sp5_rp0.8_test100_2.pkl EPC0=4845.9150  EPC-del=3057.5160  Δ=-1788.3990  K=10
SW_sz100_sp5_rp0.9_test100_0.pkl EPC0=4933.8800  EPC-del=3436.1370  Δ=-1497.7430  K=10
SW_sz100_sp5_rp0.9_test100_1.pkl EPC0=4938.


