#1. Assessing Attack Vulnerability in Networks with Uncertainty
My Thai. Paper implementation - Benchmark methods

## Sanity Checks

## Implementation

In [None]:
import networkx as nx
import numpy as np
import random
import time
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import linprog
from tqdm import tqdm

# ----- Core Sampling and Estimation -----
def sample_realization(G: nx.Graph) -> nx.Graph:
    H = nx.Graph()
    H.add_nodes_from(G.nodes())
    for u, v in G.edges():
        if random.random() < G.edges[u, v]['p']:
            H.add_edge(u, v)
    return H

def pairwise_connectivity(H: nx.Graph) -> float:
    return sum(len(c) * (len(c) - 1) / 2 for c in nx.connected_components(H))

def estimate_epc(G: nx.Graph, N: int) -> float:
    n = G.number_of_nodes()
    total = 0.0
    for _ in range(N):
        u = random.choice(list(G.nodes()))
        visited = {u}
        queue = [u]
        while queue:
            v = queue.pop(0)
            for w in G.neighbors(v):
                if w not in visited and random.random() < G.edges[v, w]['p']:
                    visited.add(w)
                    queue.append(w)
        total += (len(visited) - 1)
    return (n * total) / (2 * N)

# ----- Heuristic Removals -----
def remove_k_betweenness(G: nx.Graph, k: int) -> nx.Graph:
    bc = nx.betweenness_centrality(G)
    topk = sorted(bc, key=bc.get, reverse=True)[:k]
    H = G.copy()
    H.remove_nodes_from(topk)
    return H

def remove_k_pagerank_edges(G: nx.Graph, k: int) -> nx.Graph:
    L = nx.line_graph(G)
    pr = nx.pagerank(L)
    topk = sorted(pr, key=pr.get, reverse=True)[:k]
    H = G.copy()
    H.remove_edges_from(topk)
    return H

# ----- Sample-Average Approximation (SAA) -----
def sample_average_objective(G: nx.Graph, S: set, T: int) -> float:
    total = 0.0
    for _ in range(T):
        H = sample_realization(G)
        H.remove_nodes_from(S)
        total += pairwise_connectivity(H)
    return total / T

def greedy_initial_SAA(G: nx.Graph, k: int, T: int) -> set:
    S = set()
    candidates = set(G.nodes())
    for _ in range(k):
        best_node, best_obj = None, float('inf')
        for u in candidates:
            obj = sample_average_objective(G, S | {u}, T)
            if obj < best_obj:
                best_node, best_obj = u, obj
        S.add(best_node)
        candidates.remove(best_node)
    return S

def SAA(G: nx.Graph, k: int, T: int) -> set:
    S = greedy_initial_SAA(G, k, T)
    improved = True
    while improved:
        improved = False
        current_obj = sample_average_objective(G, S, T)
        for u in list(S):
            for v in set(G.nodes()) - S:
                newS = (S - {u}) | {v}
                new_obj = sample_average_objective(G, newS, T)
                if new_obj < current_obj:
                    S, improved, current_obj = newS, True, new_obj
                    break
            if improved:
                break
    return S

# ----- LP Relaxation for REGA -----
def solve_lp_relaxation(G: nx.Graph, D: set, k: int) -> dict:
    nodes = list(G.nodes())
    n = len(nodes)
    edges = list(G.edges())
    m = len(edges)

    idx_s = {nodes[i]: i for i in range(n)}
    idx_z = {edges[j]: n + j for j in range(m)}

    bounds = [(0,1)] * (n + m)
    for u in D:
        bounds[idx_s[u]] = (1,1)

    A_eq = np.zeros((1, n + m))
    for u in nodes:
        A_eq[0, idx_s[u]] = 1
    b_eq = [k]

    A_ub, b_ub = [], []
    for (u, v) in edges:
        iu, iv = idx_s[u], idx_s[v]
        iz = idx_z[(u, v)]
        row = np.zeros(n + m); row[iu] = 1; row[iz] = 1
        A_ub.append(row); b_ub.append(1)
        row = np.zeros(n + m); row[iv] = 1; row[iz] = 1
        A_ub.append(row); b_ub.append(1)
        row = np.zeros(n + m); row[iz] = -1; row[iu] = -1; row[iv] = -1
        A_ub.append(row); b_ub.append(-1)
    A_ub = np.array(A_ub); b_ub = np.array(b_ub)

    c = np.zeros(n + m)
    for j, (u, v) in enumerate(edges):
        c[n + j] = G.edges[u, v]['p']

    res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
                  bounds=bounds, method='highs')
    if not res.success:
        raise RuntimeError("LP infeasible: " + res.message)
    return {nodes[i]: float(res.x[idx_s[nodes[i]]]) for i in range(n)}

def REGA_with_LP(G: nx.Graph, k: int, T_inner: int, R: int, alpha: float) -> set:
    def sample_avg(S):
        total = 0.0
        for _ in range(T_inner):
            H = sample_realization(G)
            H.remove_nodes_from(S)
            total += pairwise_connectivity(H)
        return total / T_inner

    best_S, best_obj = None, float('inf')
    nodes = set(G.nodes())

    for _ in range(R):
        D = set()
        for _ in range(k):
            s_vals = solve_lp_relaxation(G, D, k)
            rem = list(nodes - D)
            sorted_nodes = sorted(rem, key=lambda u: s_vals[u], reverse=True)
            m = max(1, int(alpha * len(sorted_nodes)))
            D.add(random.choice(sorted_nodes[:m]))
        current_obj = sample_avg(D)
        improved = True
        while improved:
            improved = False
            for u in list(D):
                for v in nodes - D:
                    newS = (D - {u}) | {v}
                    val = sample_avg(newS)
                    if val < current_obj:
                        D, current_obj, improved = newS, val, True
                        break
                if improved:
                    break
        if current_obj < best_obj:
            best_S, best_obj = D.copy(), current_obj
    return best_S

# ----- Experiment Runner including ALL algorithms -----
def run_experiments(models, ps, k,
                    T_saa, T_inner_rega, R_rega, alpha_rega, N_eval):
    records = []
    for name, G0 in tqdm(models.items(), desc="Running experiments", total=len(models)):
        for p in tqdm(ps, desc=f"Model {name} with p", total=len(ps)):
            G = G0.copy()
            for u, v in G.edges():
                G.edges[u, v]['p'] = p

            # Betweenness
            t0 = time.perf_counter()
            G_bc = remove_k_betweenness(G, k)
            t_bc = time.perf_counter() - t0

            epc_bc = estimate_epc(G_bc, N_eval)

            # PageRank
            t0 = time.perf_counter()
            G_pr = remove_k_pagerank_edges(G, k)
            t_pr = time.perf_counter() - t0

            epc_pr = estimate_epc(G_pr, N_eval)

            #SAA
            t0 = time.perf_counter()
            S_saa = SAA(G, k, T_saa)
            t_saa = time.perf_counter() - t0

            G_saa = G.copy(); G_saa.remove_nodes_from(S_saa)
            epc_saa = estimate_epc(G_saa, N_eval)

            # REGA-LP
            t0 = time.perf_counter()
            S_rega = REGA_with_LP(G, k, T_inner_rega, R_rega, alpha_rega)
            t_rega = time.perf_counter() - t0

            G_rega = G.copy(); G_rega.remove_nodes_from(S_rega)
            epc_rega = estimate_epc(G_rega, N_eval)

            for algo, t, e in [
                ('Betweenness', t_bc, epc_bc),
                ('PageRank',    t_pr, epc_pr),
                ('SAA',         t_saa, epc_saa),
                ('REGA-LP',     t_rega, epc_rega)
            ]:
                records.append({
                    'model': name, 'p': p, 'algo': algo,
                    'time': t, 'epc': e
                })
    return pd.DataFrame(records)

In [None]:


# ----- Define models and parameters -----
models = {
    'ER': nx.gnm_random_graph(50, 100, seed=42),
    'BA': nx.barabasi_albert_graph(50, 2, seed=42),
    'SW': nx.watts_strogatz_graph(50, 4, 0.3, seed=42),
}

ps = np.arange(0.0, 1.2, 0.2)
k = 10
T_saa = 30
T_inner_rega = 1000    # inner-sample size for REGA
R_rega = 5            # restarts for REGA
alpha_rega = 0.2
N_eval = 10000         # final EPC sample count (use 1e5 for paper-quality)

# Execute experiments
df = run_experiments(models, ps, k, T_saa, T_inner_rega, R_rega, alpha_rega, N_eval)

# Plot EPC vs p and Time vs p
for name in models:
    plt.figure()
    for algo in df.algo.unique():
        sub = df[(df.model == name) & (df.algo == algo)]
        plt.plot(sub.p, sub.epc, label=algo)
    plt.title(f"{name} — EPC vs p")
    plt.xlabel("p"); plt.ylabel("EPC"); plt.grid(True); plt.legend()
    plt.savefig(f"{name}_epc_vs_p.png")

    plt.figure()
    for algo in df.algo.unique():
        sub = df[(df.model == name) & (df.algo == algo)]
        plt.plot(sub.p, sub.time, label=algo)
    plt.title(f"{name} — Time vs p")
    plt.xlabel("p"); plt.ylabel("Time (s)"); plt.grid(True); plt.legend()
    plt.savefig(f"{name}_time_vs_p.png")
plt.show()


# 2. Empty-set Greedy Heuristics

## Sanity Check

In [None]:
# Run all sanity-check cases for the stochastic CNDP implementation
# This block re‑defines the minimal functions needed, so it is fully
# self‑contained (no need for an external stochastic_cndp.py file).

import importlib, subprocess, sys, math, itertools, random, time, json, os, collections

# install networkx & matplotlib if missing
try:
    import networkx as nx
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "networkx"])
    import networkx as nx

try:
    import numpy as np
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy"])
    import numpy as np

try:
    import matplotlib.pyplot as plt
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "matplotlib"])
    import matplotlib.pyplot as plt

# ---------------- Helper functions ----------------
def pairwise_cost_det(G_det):
    cost = 0
    for comp in nx.connected_components(G_det):
        s = len(comp)
        cost += s * (s - 1) // 2
    return cost

def expected_pairwise_connectivity(G, S=set(), num_samples=10000, rng=None):
    if rng is None:
        rng = random.Random()
    remaining = set(G.nodes()) - set(S)
    if not remaining:
        return 0.0
    total = 0
    edges = list(G.edges(data=True))
    for _ in range(num_samples):
        H = nx.Graph()
        H.add_nodes_from(remaining)
        for u, v, d in edges:
            if u in S or v in S:
                continue
            if rng.random() < d["prob"]:
                H.add_edge(u, v)
        total += pairwise_cost_det(H)
    return total / num_samples

def greedy_cndp(G, K, num_samples=2000, seed=None):
    rng = random.Random(seed)
    S = set()
    sigmas = [expected_pairwise_connectivity(G, S, num_samples, rng)]
    for _ in range(K):
        best_v, best_sigma = None, float("inf")
        for v in set(G.nodes()) - S:
            sigma_v = expected_pairwise_connectivity(G, S | {v}, num_samples, rng)
            if sigma_v < best_sigma:
                best_sigma, best_v = sigma_v, v
        S.add(best_v)
        sigmas.append(best_sigma)
    return S, sigmas

# ------------------ Check 1: Edge‑count distribution ------------------
print("CHECK 1: Edge‑count distribution in live‑edge samples")
# G_small = nx.complete_graph(5)
G_small = nx.path_graph(4)
for u, v in G_small.edges():
    G_small[u][v]["prob"] = 0.7   # constant probability

rng = random.Random(0)
edge_counts = []
for _ in range(20000):
    H = nx.Graph()
    H.add_nodes_from(G_small.nodes())
    for u, v, d in G_small.edges(data=True):
        if rng.random() < d["prob"]:
            H.add_edge(u, v)
    edge_counts.append(H.number_of_edges())

mean_edges = np.mean(edge_counts)
expected_mean = 0.3 * G_small.number_of_edges()
print(f"  Sample mean edges = {mean_edges:.3f} (expected {expected_mean:.3f})")

# plot histogram (single plot as required)
plt.figure()
plt.hist(edge_counts, bins=range(0, G_small.number_of_edges() + 2), rwidth=0.9)
plt.xlabel("edges in one live‑edge sample")
plt.ylabel("frequency (20 000 samples)")
plt.title("Check 1: Edge‑count distribution")
plt.show()

# ------------------ Check 2: Exact vs Monte‑Carlo σ ------------------
print("\nCHECK 2: Exact σ vs Monte‑Carlo σ on tiny graph")
def sigma_exact(G, S=set()):
    rem = set(G.nodes()) - set(S)
    edges = list(G.edges())
    total = 0.0
    for mask in range(1 << len(edges)):
        H = nx.Graph(); H.add_nodes_from(rem)
        p_scenario = 1.0
        for bit, (u, v) in enumerate(edges):
            p = G[u][v]["prob"]
            chosen = (mask >> bit) & 1
            if chosen and u not in S and v not in S:
                H.add_edge(u, v)
                p_scenario *= p
            else:
                p_scenario *= (1 - p)
        total += p_scenario * pairwise_cost_det(H)
    return total

exact_sigma = sigma_exact(G_small, set())
mc_sigma = expected_pairwise_connectivity(G_small, set(), num_samples=20000, rng=rng)
print(f"  Exact σ = {exact_sigma:.4f}")
print(f"  Monte‑Carlo σ (20 000 samples) = {mc_sigma:.4f}")
print(f"  Relative error = {(mc_sigma - exact_sigma) / exact_sigma * 100:.2f}%")

# ------------------ Check 3: Deterministic extremes ------------------
print("\nCHECK 3: Deterministic extremes (all prob = 0 or 1)")
G_zero = nx.path_graph(6)
for u, v in G_zero.edges():
    G_zero[u][v]["prob"] = 0.0
sigma_zero = expected_pairwise_connectivity(G_zero, set(), 5000, rng)
print(f"  All probs = 0 ⇒ σ = {sigma_zero} (should be 0)")

G_one = nx.path_graph(6)
for u, v in G_one.edges():
    G_one[u][v]["prob"] = 1.0
sigma_one_mc = expected_pairwise_connectivity(G_one, set(), 5000, rng)
sigma_one_exact = pairwise_cost_det(G_one)
print(f"  All probs = 1 ⇒ MC σ ≈ {sigma_one_mc:.1f}, exact deterministic σ = {sigma_one_exact}")

# ------------------ Check 4: Greedy monotonicity ------------------
print("\nCHECK 4: Greedy monotonicity (σ must not increase)")
G_test = nx.erdos_renyi_graph(40, 0.07, seed=1)
for u, v in G_test.edges():
    G_test[u][v]["prob"] = random.uniform(0.1, 0.9)
S_sel, sigma_seq = greedy_cndp(G_test, K=6, num_samples=1000, seed=1)
monotone = all(sigma_seq[i] >= sigma_seq[i+1] for i in range(len(sigma_seq)-1))
print(f"  σ sequence: {['{:.0f}'.format(s) for s in sigma_seq]}")
print(f"  Monotone non‑increasing? {monotone}")

# ------------------ Check 5: Replicability with fixed seed ------------------
print("\nCHECK 5: Replicability (same seed ⇒ same result)")
S1, sig1 = greedy_cndp(G_test, K=4, num_samples=500, seed=123)
S2, sig2 = greedy_cndp(G_test, K=4, num_samples=500, seed=123)
print(f"  Run 1 selected nodes: {sorted(S1)}")
print(f"  Run 2 selected nodes: {sorted(S2)}")
print(f"  Identical selections? {sorted(S1)==sorted(S2)}")
print(f"  σ sequences identical? {np.allclose(sig1, sig2)}")




## Implementation

In [None]:
"""
stochastic_cndp.py
------------------
Heuristic solver for the Stochastic Critical-Node Detection Problem (CNDP)
with *edge* uncertainty, as defined in Stochastic_CNDP.pdf.

Implements:
  • Algorithm 1  – Monte-Carlo estimator of σ(S)
  • Algorithm 2  – greedy attacker using that estimator
Supports:
  • Erdős–Rényi G(n,p) graphs
  • Watts–Strogatz Watts–Strogatz(n, k, β) graphs
"""

from __future__ import annotations
import random, itertools, math, time
import networkx as nx
import numpy as np
from tqdm import tqdm


# ------------------------------------------------------------
# 1.  GRAPH BUILDING UTILITIES
# ------------------------------------------------------------
def er_prob_graph(n: int, p_edge: float,
                  p_low: float=0.1, p_high: float=0.9,
                  seed: int|None=None) -> nx.Graph:
    """G(n,p_edge) with iid edge-existence probs U(p_low,p_high)."""
    rng = random.Random(seed)
    G = nx.erdos_renyi_graph(n, p_edge, seed=seed)
    for u, v in G.edges():
        G[u][v]["prob"] = rng.uniform(p_low, p_high)
    return G


def ws_prob_graph(n: int, k: int, beta: float,
                  p_low: float=0.1, p_high: float=0.9,
                  seed: int|None=None) -> nx.Graph:
    """Watts–Strogatz small-world graph with iid edge probs."""
    rng = random.Random(seed)
    G = nx.watts_strogatz_graph(n, k, beta, seed=seed)
    for u, v in G.edges():
        G[u][v]["prob"] = rng.uniform(p_low, p_high)
    return G


# ------------------------------------------------------------
# 2.  PAIRWISE CONNECTIVITY ON ONE *SCENARIO*
# ------------------------------------------------------------
def pairwise_cost_det(G_det: nx.Graph) -> int:
    """Σ_{components} |C|·(|C|-1)/2 – deterministic definition
    used inside the Monte-Carlo loop."""
    cost = 0
    for comp in nx.connected_components(G_det):
        s = len(comp)
        cost += s * (s - 1) // 2
    return cost


# ------------------------------------------------------------
# 3.  ALGORITHM 1 –  MONTE-CARLO ESTIMATE  σ(S)
# ------------------------------------------------------------
def expected_pairwise_connectivity(
    G: nx.Graph,
    S: set[int] | set[str],
    num_samples: int = 10_000,
    rng: random.Random | None = None,
    show_bar: bool = True,
) -> float:
    """
    Monte-Carlo estimator of σ(S) from Algorithm 1​ :contentReference[oaicite:0]{index=0}
    """
    if rng is None:
        rng = random.Random()

    remaining_nodes = set(G.nodes()) - S
    if not remaining_nodes:
        return 0.0

    total_cost = 0
    iterator = range(num_samples)
    if show_bar:
        iterator = tqdm(iterator, desc="MC-samples", leave=False)

    for _ in iterator:
        # sample a *live-edge* scenario
        H = nx.Graph()
        H.add_nodes_from(remaining_nodes)

        for u, v, data in G.edges(data=True):
            if u in S or v in S:
                continue
            if rng.random() < data["prob"]:
                H.add_edge(u, v)

        total_cost += pairwise_cost_det(H)

    return total_cost / num_samples


# ------------------------------------------------------------
# 4.  ALGORITHM 2 –  GREEDY ATTACKER
# ------------------------------------------------------------
def greedy_cndp(
    G: nx.Graph,
    K: int,
    num_samples: int = 2_000,
    seed: int | None = None,
) -> tuple[set[int], list[float]]:
    """Return (selected_set, σ values after each pick)."""
    rng = random.Random(seed)
    S: set[int] = set()
    sigmas: list[float] = []

    current_sigma = expected_pairwise_connectivity(G, S, num_samples, rng)
    sigmas.append(current_sigma)

    for _ in range(K):
        best_node, best_sigma = None, float("inf")

        for v in (set(G.nodes()) - S):
            sigma_v = expected_pairwise_connectivity(G, S | {v},
                                                     num_samples, rng, False)
            if sigma_v < best_sigma:
                best_sigma, best_node = sigma_v, v

        S.add(best_node)                # exploit 1-step look-ahead
        current_sigma = best_sigma
        sigmas.append(current_sigma)
        print(f"● Picked {best_node:>3};   σ = {current_sigma:.1f}")

    return S, sigmas


# ------------------------------------------------------------
# 5.  QUICK DRIVER FOR EXPERIMENTS
# ------------------------------------------------------------
if __name__ == "__main__":
    # import argparse, json, pathlib

    # parser = argparse.ArgumentParser(
    #     description="Greedy heuristic for stochastic CNDP")
    # parser.add_argument("--model", choices=["er", "ws"], default="er")
    # parser.add_argument("-n", type=int, default=100,
    #                     help="number of nodes")
    # parser.add_argument("--p", type=float, default=0.05,
    #                     help="edge probability for ER (or rewiring β for WS)")
    # parser.add_argument("--k", type=int, default=4,
    #                     help="nearest-neighbour degree in WS")
    # parser.add_argument("--budget", "-K", type=int, default=5)
    # parser.add_argument("--samples", type=int, default=2000)
    # parser.add_argument("--seed", type=int, default=0)
    # args = parser.parse_args()

    model = "er"
    n = 30
    p = 0.2
    seed = 42
    budget = 10
    samples = 10000
    k = 4  # only used for WS model

    if model == "er":
        G = er_prob_graph(n, p, seed=seed)
    else:
        G = ws_prob_graph(n, k, p, seed=seed)

    start = time.perf_counter()
    S_star, sigmas = greedy_cndp(G, budget,
                                 num_samples=samples,
                                 seed=seed)
    elapsed = time.perf_counter() - start
    print("\n=====  RESULT  =====")
    print(f"Removed nodes: {sorted(S_star)}")
    print("σ after each pick:", [round(x, 1) for x in sigmas])
    print(f"Elapsed: {elapsed:.1f} s")


# 3. Prof. Ashwin algorithm

## Sanity Check

In [None]:
import random, itertools, math, networkx as nx
import pandas as pd

# ---------- Helper functions ----------

def threshold_graph(G_prob, tau=0.5):
    """Return deterministic skeleton keeping only edges whose 'prob' >= tau."""
    G = nx.Graph()
    G.add_nodes_from(G_prob.nodes())
    for u, v, data in G_prob.edges(data=True):
        if data["prob"] >= tau:
            G.add_edge(u, v)
    return G

def greedy_mis(G):
    """Simple degree-ascending greedy maximal independent set."""
    mis = set()
    for v in sorted(G.nodes(), key=G.degree):
        if all(n not in mis for n in G.neighbors(v)):
            mis.add(v)
    return mis

def pairwise_cost_det(G):
    """Deterministic connectivity cost Σ σ(σ-1)/2 over components."""
    cost = 0
    for comp in nx.connected_components(G):
        s = len(comp)
        cost += s*(s-1)//2
    return cost

def arulselvan_cndp(G_det, k):
    """Return deletion set of size k using MIS + greedy augmentation."""
    mis = greedy_mis(G_det)
    # Greedy augmentation
    while len(mis) != G_det.number_of_nodes() - k:
        best_phi, best_v = math.inf, None
        for v in set(G_det.nodes()) - mis:
            # compute phi if we keep v (delete others)
            temp_keep = mis | {v}
            H = G_det.subgraph(temp_keep)
            phi = pairwise_cost_det(H)
            if phi < best_phi:
                best_phi, best_v = phi, v
        mis.add(best_v)
    deletion_set = set(G_det.nodes()) - mis
    return deletion_set

def expected_pairwise_cost(G_prob, removed_nodes=None, T=200, seed=0):
    """Monte Carlo estimate of expected pairwise connectivity after removing nodes."""
    if removed_nodes is None:
        removed_nodes = set()
    rng = random.Random(seed)
    nodes_kept = [n for n in G_prob.nodes() if n not in removed_nodes]
    if not nodes_kept:   # no nodes remain
        return 0.0
    acc = 0.0
    edges_with_prob = [(u, v, data["prob"]) for u, v, data in G_prob.edges(data=True)]
    for _ in range(T):
        Gs = nx.Graph()
        Gs.add_nodes_from(nodes_kept)
        for u, v, p in edges_with_prob:
            if u in removed_nodes or v in removed_nodes:
                continue
            if rng.random() < p:
                Gs.add_edge(u, v)
        acc += pairwise_cost_det(Gs)
    return acc / T

# ---------- 1. Toy Example (6 nodes) -------------

# manual edge list with probabilities
toy_edges = [
    (1, 2, 0.80),
    (1, 3, 0.48),
    (2, 3, 0.70),
    (2, 4, 0.25),
    (3, 4, 0.60),
    (3, 5, 0.30),
    (4, 5, 0.65),
    (4, 6, 0.90),
    (5, 6, 0.35),
]
G_toy_prob = nx.Graph()
G_toy_prob.add_nodes_from(range(1, 7))
for u, v, p in toy_edges:
    G_toy_prob.add_edge(u, v, prob=p)

tau = 0.5
k_toy = 2
G_toy_det = threshold_graph(G_toy_prob, tau)
del_toy = arulselvan_cndp(G_toy_det, k_toy)

epc_before_toy = expected_pairwise_cost(G_toy_prob, removed_nodes=set(), T=500, seed=42)
epc_after_toy  = expected_pairwise_cost(G_toy_prob, removed_nodes=del_toy, T=500, seed=42)

# ---------- 2. Medium Example (100 nodes, 200 edges) -------------

random.seed(1)
n, m, k_medium = 50, 100, 15
G_med_det_base = nx.gnm_random_graph(n, m, seed=2)  # deterministic backbone

# assign heterogeneous per-edge reliability ~ Uniform[0.1,1]
G_med_prob = nx.Graph()
G_med_prob.add_nodes_from(G_med_det_base.nodes())
for u, v in G_med_det_base.edges():
    G_med_prob.add_edge(u, v, prob=random.uniform(0.1, 1.0))

tau_med = 0.5
G_med_det = threshold_graph(G_med_prob, tau_med)
del_med = arulselvan_cndp(G_med_det, k_medium)

epc_before_med = expected_pairwise_cost(G_med_prob, set(), T=200, seed=24)
epc_after_med  = expected_pairwise_cost(G_med_prob, del_med, T=200, seed=24)

# ---------- Summaries -------------

summary_toy = {
    "Example": ["Toy (n=6)"],
    "Threshold τ": [tau],
    "k deletions": [k_toy],
    "Deletion set": [sorted(del_toy)],
    "EPC before": [round(epc_before_toy, 2)],
    "EPC after": [round(epc_after_toy, 2)]
}

summary_med = {
    "Example": ["Medium (n=100)"],
    "Threshold τ": [tau_med],
    "k deletions": [k_medium],
    "Deletion set": [sorted(del_med)[:5] + ["..."]],  # truncate for display
    "EPC before": [round(epc_before_med, 1)],
    "EPC after": [round(epc_after_med, 1)]
}

df = pd.concat([pd.DataFrame(summary_toy), pd.DataFrame(summary_med)], ignore_index=True)


In [None]:
import random, itertools, math, networkx as nx
import pandas as pd

# ---------- Helper functions ----------

def threshold_graph(G_prob, tau=0.5):
    """Return deterministic skeleton keeping only edges whose 'prob' >= tau."""
    G = nx.Graph()
    G.add_nodes_from(G_prob.nodes())
    for u, v, data in G_prob.edges(data=True):
        if data["prob"] >= tau:
            G.add_edge(u, v)
    return G

def greedy_mis(G):
    """Simple degree-ascending greedy maximal independent set."""
    mis = set()
    for v in sorted(G.nodes(), key=G.degree):
        if all(n not in mis for n in G.neighbors(v)):
            mis.add(v)
    return mis

def pairwise_cost_det(G):
    """Deterministic connectivity cost Σ σ(σ-1)/2 over components."""
    cost = 0
    for comp in nx.connected_components(G):
        s = len(comp)
        cost += s*(s-1)//2
    return cost

def arulselvan_cndp(G_det, k):
    """Return deletion set of size k using MIS + greedy augmentation."""
    mis = greedy_mis(G_det)
    # Greedy augmentation
    while len(mis) != G_det.number_of_nodes() - k:
        best_phi, best_v = math.inf, None
        for v in set(G_det.nodes()) - mis:
            # compute phi if we keep v (delete others)
            temp_keep = mis | {v}
            H = G_det.subgraph(temp_keep)
            phi = pairwise_cost_det(H)
            if phi < best_phi:
                best_phi, best_v = phi, v
        mis.add(best_v)
    deletion_set = set(G_det.nodes()) - mis
    return deletion_set

def expected_pairwise_cost(G_prob, removed_nodes=None, T=200, seed=0):
    """Monte Carlo estimate of expected pairwise connectivity after removing nodes."""
    if removed_nodes is None:
        removed_nodes = set()
    rng = random.Random(seed)
    nodes_kept = [n for n in G_prob.nodes() if n not in removed_nodes]
    if not nodes_kept:   # no nodes remain
        return 0.0
    acc = 0.0
    edges_with_prob = [(u, v, data["prob"]) for u, v, data in G_prob.edges(data=True)]
    for _ in range(T):
        Gs = nx.Graph()
        Gs.add_nodes_from(nodes_kept)
        for u, v, p in edges_with_prob:
            if u in removed_nodes or v in removed_nodes:
                continue
            if rng.random() < p:
                Gs.add_edge(u, v)
        acc += pairwise_cost_det(Gs)
    return acc / T

# ---------- 1. Toy Example (6 nodes) -------------

# manual edge list with probabilities
toy_edges = [
    (1, 2, 0.80),
    (1, 3, 0.48),
    (2, 3, 0.70),
    (2, 4, 0.25),
    (3, 4, 0.60),
    (3, 5, 0.30),
    (4, 5, 0.65),
    (4, 6, 0.90),
    (5, 6, 0.35),
]
G_toy_prob = nx.Graph()
G_toy_prob.add_nodes_from(range(1, 7))
for u, v, p in toy_edges:
    G_toy_prob.add_edge(u, v, prob=p)

tau = 0.5
k_toy = 2
G_toy_det = threshold_graph(G_toy_prob, tau)
del_toy = arulselvan_cndp(G_toy_det, k_toy)

epc_before_toy = expected_pairwise_cost(G_toy_prob, removed_nodes=set(), T=500, seed=42)
epc_after_toy  = expected_pairwise_cost(G_toy_prob, removed_nodes=del_toy, T=500, seed=42)

# ---------- 2. Medium Example (100 nodes, 200 edges) -------------

random.seed(1)
n, m, k_medium = 50, 100, 15
G_med_det_base = nx.gnm_random_graph(n, m, seed=2)  # deterministic backbone

# assign heterogeneous per-edge reliability ~ Uniform[0.1,1]
G_med_prob = nx.Graph()
G_med_prob.add_nodes_from(G_med_det_base.nodes())
for u, v in G_med_det_base.edges():
    G_med_prob.add_edge(u, v, prob=random.uniform(0.1, 1.0))

tau_med = 0.5
G_med_det = threshold_graph(G_med_prob, tau_med)
del_med = arulselvan_cndp(G_med_det, k_medium)

epc_before_med = expected_pairwise_cost(G_med_prob, set(), T=200, seed=24)
epc_after_med  = expected_pairwise_cost(G_med_prob, del_med, T=200, seed=24)

# ---------- Summaries -------------

summary_toy = {
    "Example": ["Toy (n=6)"],
    "Threshold τ": [tau],
    "k deletions": [k_toy],
    "Deletion set": [sorted(del_toy)],
    "EPC before": [round(epc_before_toy, 2)],
    "EPC after": [round(epc_after_toy, 2)]
}

summary_med = {
    "Example": ["Medium (n=100)"],
    "Threshold τ": [tau_med],
    "k deletions": [k_medium],
    "Deletion set": [sorted(del_med)[:5] + ["..."]],  # truncate for display
    "EPC before": [round(epc_before_med, 1)],
    "EPC after": [round(epc_after_med, 1)]
}

df = pd.concat([pd.DataFrame(summary_toy), pd.DataFrame(summary_med)], ignore_index=True)


## Implementation

# 4. Centrality-based heuristics

## Sanity Check

## Implementation

# 5. Analysis: Grid-fixed probability scenario (My Thai.) vs Draws from uniform[0.0, 1.0]

In [None]:
# 1. tiny 5-node ER graph, every p_uv = 0.3
import matplotlib.pyplot as plt

rng = random.Random(seed)

G = nx.Graph()
G.add_nodes_from([0, 1, 2, 3])  # include node 3 as it's used in an edge
G.add_edge(0, 1, prob=0.7)
G.add_edge(1, 2, prob=0.7)
G.add_edge(2, 3, prob=0.7)



# G = nx.complete_graph(4)
# G = nx.path_graph(4)  # 0-1-2-3
# for u, v in G.edges():
#     G[u][v]["prob"] = rng.uniform(0.0, 1.0)        # constant prob ⇒ analytic mean = 0.3 * 10 = 3
    # G[u][v]["prob"] = 0.7

print(G.nodes())
print(G.edges(data=True))

rng = random.Random(0)
edge_counts = []
for _ in range(20_000):
    H = nx.Graph()
    H.add_nodes_from(G.nodes())
    for u, v, d in G.edges(data=True):
        if rng.random() < d["prob"]:
            H.add_edge(u, v)
    edge_counts.append(H.number_of_edges())

print("Sample mean edges:", np.mean(edge_counts))  # ~ 2.99 → close to 3

plt.hist(edge_counts, bins=range(11), rwidth=0.9)
plt.xlabel("edges in one sample"); plt.ylabel("frequency"); plt.title("Live-edge distribution")
plt.show()

In [None]:
# ground-truth by enumeration (only 10 edges → 1024 scenarios)
def sigma_exact(G, S):
    rem = set(G.nodes()) - S
    total = 0
    E = list(G.edges())
    for mask in range(1 << len(E)):
        H = nx.Graph(); H.add_nodes_from(rem)
        p = 1.0
        for bit, (u, v) in enumerate(E):
            q = G[u][v]["prob"]
            choose = (mask >> bit) & 1
            if choose and u not in S and v not in S:
                H.add_edge(u, v)
                p *= q
            else:
                p *= (1-q)
        total += p * pairwise_cost_det(H)
    return total

exact = sigma_exact(G, S=set())
mc    = expected_pairwise_connectivity(G, S=set(), num_samples=100000, rng=rng)
print("Exact σ:", exact, "   Monte-Carlo σ:", mc)


# 6. All benchmark methods comparision

In [None]:
def run_experiments_with_threshold(models, ps, k,
                                   T_saa, T_rega, R_rega, alpha,
                                   tau, N_eval):
    records = []
    for name, G0 in tqdm(models.items(), desc="Running experiments", total=len(models)):
        for p in tqdm(ps, desc=f"Model {name} with p", total=len(ps)):
            # 1. build probabilistic graph
            G = G0.copy()
            nx.set_edge_attributes(G, {e: p for e in G.edges()}, 'p')

            # 2. Betweenness
            t0 = time.perf_counter()
            H_bc = remove_k_betweenness(G, k)
            t_bc = time.perf_counter() - t0
            epc_bc = estimate_epc(H_bc, N_eval)

            # 3. PageRank
            t0 = time.perf_counter()
            H_pr = remove_k_pagerank_edges(G, k)
            t_pr = time.perf_counter() - t0
            epc_pr = estimate_epc(H_pr, N_eval)

            # 4. SAA
            t0 = time.perf_counter()
            S_saa = SAA(G, k, T_saa)
            t_saa = time.perf_counter() - t0
            H_saa = G.copy(); H_saa.remove_nodes_from(S_saa)
            epc_saa = estimate_epc(H_saa, N_eval)

            # 5. REGA
            t0 = time.perf_counter()
            S_rega = REGA_with_LP(G, k, T_rega, R_rega, alpha)
            t_rega = time.perf_counter() - t0
            H_rega = G.copy(); H_rega.remove_nodes_from(S_rega)
            epc_rega = estimate_epc(H_rega, N_eval)

            # 6. Threshold + MIS (Arulselvan et al.)
            t0 = time.perf_counter()
            G_det = threshold_graph(G, tau)
            del_th = arulselvan_cndp(G_det, k)
            t_th = time.perf_counter() - t0
            H_th = G.copy(); H_th.remove_nodes_from(del_th)
            epc_th = estimate_epc(H_th, N_eval)

            # 7. collect records for all 5
            for algo, t, epc in [
                ('Betweenness', t_bc, epc_bc),
                ('PageRank',    t_pr, epc_pr),
                ('SAA',         t_saa, epc_saa),
                ('REGA',        t_rega, epc_rega),
                ('Thresh-MIS',  t_th,  epc_th),
            ]:
                records.append({
                    'model': name,
                    'p':      p,
                    'algo':   algo,
                    'time':   t,
                    'epc':    epc
                })
    return pd.DataFrame(records)

In [None]:
models = {
    'ER': nx.gnm_random_graph(20, 40, seed=42),
    'BA': nx.barabasi_albert_graph(20, 2, seed=42),
    'SW': nx.watts_strogatz_graph(20, 4, 0.3, seed=42),
}

df = run_experiments_with_threshold(
    models,
    ps=[0.1,0.2,0.5,0.7 ,1.0],
    k=10,
    T_saa=30,
    T_rega=1000,
    R_rega=5,
    alpha=0.2,
    tau=0.5,
    N_eval=10000
)

In [None]:
for name in models:
    plt.figure()
    for algo in df.algo.unique():
        sub = df[(df.model == name) & (df.algo == algo)]
        plt.plot(sub.p, sub.epc, label=algo)
    plt.title(f"{name} — EPC vs p")
    plt.xlabel("p"); plt.ylabel("EPC"); plt.grid(True); plt.legend()
    plt.savefig(f"{name}_epc_vs_p.png")

    plt.figure()
    for algo in df.algo.unique():
        sub = df[(df.model == name) & (df.algo == algo)]
        plt.plot(sub.p, sub.time, label=algo)
    plt.title(f"{name} — Time vs p")
    plt.xlabel("p"); plt.ylabel("Time (s)"); plt.grid(True); plt.legend()
    plt.savefig(f"{name}_time_vs_p.png")
plt.show()