In [67]:
import numpy as np
import torch
import torch.nn.functional as F

from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv

from sklearn.model_selection import train_test_split

In [69]:
X = np.load("X_rede-sub-a-alt.npy")              # (15680, 36, 3)
y = np.load("y_rede-sub-a-alt.npy")              # (15680,)
edge_index = np.load("edge_index_rede-sub-a-alt.npy")  # (2, 80)

print(X.shape, y.shape, edge_index.shape)


(51040, 45, 3) (51040,) (2, 120)


In [70]:
node_list = np.load("node_list_rede-sub-a-alt.npy", allow_pickle=True).tolist()
junction_list = np.load("junction_list_rede-sub-a-alt.npy", allow_pickle=True).tolist()

print(len(node_list), len(junction_list))

45 44


In [71]:
X_norm = X.copy()

for f in range(X_norm.shape[2]):
    mean = X_norm[:, :, f].mean()
    std  = X_norm[:, :, f].std() + 1e-6
    X_norm[:, :, f] = (X_norm[:, :, f] - mean) / std

X = X_norm
print("Features normalized")


Features normalized


In [72]:
X = torch.tensor(X, dtype=torch.float)
y = torch.tensor(y, dtype=torch.long)
edge_index = torch.tensor(edge_index, dtype=torch.long)

num_samples, num_nodes, num_features = X.shape
num_classes = len(torch.unique(y))

print("Nodes:", num_nodes)
print("Leak classes (junctions):", num_classes)


Nodes: 45
Leak classes (junctions): 44


In [73]:
junction_indices = [node_list.index(j) for j in junction_list]
junction_indices = torch.tensor(junction_indices, dtype=torch.long)

num_junctions = len(junction_indices)
print("Number of junctions:", num_junctions)

Number of junctions: 44


In [74]:
data_list = []

for i in range(num_samples):
    data = Data(
        x=X[i],                 # (36, 3)
        edge_index=edge_index,
        y=y[i]                  # junction index
    )
    data_list.append(data)

print("Dataset ready:", len(data_list))


Dataset ready: 51040


In [75]:
train_data, test_data = train_test_split(
    data_list,
    test_size=0.2,
    random_state=42,
    stratify=y
)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_data,  batch_size=64, shuffle=False)

print(len(train_data), len(test_data))


40832 10208


In [76]:
class LeakGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, junction_indices):
        super().__init__()
        self.junction_indices = junction_indices

        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.lin   = torch.nn.Linear(hidden_channels, 1)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = F.relu(x)

        x = self.conv2(x, edge_index)
        x = F.relu(x)

        node_scores = self.lin(x).squeeze(-1)

        batch_size = batch.max().item() + 1
        out = torch.zeros(batch_size, len(self.junction_indices), device=x.device)

        for i in range(batch_size):
            scores_i = node_scores[batch == i]
            out[i] = scores_i[self.junction_indices]

        return out


In [77]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = LeakGCN(
    in_channels=3,
    hidden_channels=64,
    junction_indices=junction_indices.to(device)
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()


In [78]:
def train():
    model.train()
    total_loss = 0

    for batch in train_loader:
        batch = batch.to(device)

        optimizer.zero_grad()
        scores = model(batch.x, batch.edge_index, batch.batch)
        loss = criterion(scores, batch.y)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(train_loader)


In [79]:
def test(loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            scores = model(batch.x, batch.edge_index, batch.batch)
            preds = scores.argmax(dim=1)

            correct += (preds == batch.y).sum().item()
            total += batch.y.size(0)

    return correct / total


In [80]:
def topk_accuracy(loader, k=3):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            scores = model(batch.x, batch.edge_index, batch.batch)
            topk = scores.topk(k, dim=1).indices

            for i in range(batch.y.size(0)):
                if batch.y[i].item() in topk[i]:
                    correct += 1
                total += 1

    return correct / total

In [81]:
for epoch in range(1, 41):
    loss = train()
    train_acc = test(train_loader)
    test_acc  = test(test_loader)

    print(
        f"Epoch {epoch:02d} | "
        f"Loss {loss:.3f} | "
        f"Train {train_acc:.3f} | "
        f"Test {test_acc:.3f}"
    )

Epoch 01 | Loss 1.923 | Train 0.599 | Test 0.597
Epoch 02 | Loss 0.919 | Train 0.647 | Test 0.645
Epoch 03 | Loss 0.782 | Train 0.725 | Test 0.729
Epoch 04 | Loss 0.675 | Train 0.763 | Test 0.762
Epoch 05 | Loss 0.582 | Train 0.785 | Test 0.787
Epoch 06 | Loss 0.504 | Train 0.814 | Test 0.816
Epoch 07 | Loss 0.451 | Train 0.834 | Test 0.840
Epoch 08 | Loss 0.418 | Train 0.841 | Test 0.841
Epoch 09 | Loss 0.393 | Train 0.848 | Test 0.849
Epoch 10 | Loss 0.376 | Train 0.864 | Test 0.864
Epoch 11 | Loss 0.364 | Train 0.861 | Test 0.863
Epoch 12 | Loss 0.354 | Train 0.864 | Test 0.865
Epoch 13 | Loss 0.346 | Train 0.873 | Test 0.870
Epoch 14 | Loss 0.339 | Train 0.876 | Test 0.877
Epoch 15 | Loss 0.334 | Train 0.858 | Test 0.858
Epoch 16 | Loss 0.327 | Train 0.858 | Test 0.859
Epoch 17 | Loss 0.322 | Train 0.867 | Test 0.867
Epoch 18 | Loss 0.317 | Train 0.888 | Test 0.889
Epoch 19 | Loss 0.309 | Train 0.859 | Test 0.859
Epoch 20 | Loss 0.306 | Train 0.864 | Test 0.864
Epoch 21 | Loss 0.29