Moving to Neighborhood Sampler

In [22]:
!pip install faiss-gpu
!pip install torch
!pip install torch_geometric
!pip install faiss-cpu
!pip install tqdm
!pip install scapy
!pip install geoopt

# Install torch-scatter and torch-sparse compatible with your PyTorch version
import torch
torch_version = torch.__version__.split('+')[0]
cuda_version = torch.version.cuda.replace('.', '')
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch_version}+cu{cuda_version}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch_version}+cu{cuda_version}.html

import scipy.sparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import kneighbors_graph
from tqdm import tqdm
from torch_geometric.data import Data
from torch_geometric.loader import NeighborSampler
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Mount Google Drive (if using Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Load the split data files (train, validation, test)
print("Loading datasets...")
train_file = '/content/drive/MyDrive/modbus/train_data.csv'
val_file = '/content/drive/MyDrive/modbus/val_data.csv'
test_file = '/content/drive/MyDrive/modbus/test_data.csv'

# Read each file
train_data = pd.read_csv(train_file, low_memory=False)
val_data = pd.read_csv(val_file, low_memory=False)
test_data = pd.read_csv(test_file, low_memory=False)

# Normalize each dataset independently
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_data)
val_scaled = scaler.transform(val_data)
test_scaled = scaler.transform(test_data)

# Convert scaled data into numpy arrays for processing
train_scaled = np.array(train_scaled, dtype=np.float32)
val_scaled = np.array(val_scaled, dtype=np.float32)
test_scaled = np.array(test_scaled, dtype=np.float32)

# Function to create a KNN graph using Scikit-learn's kneighbors_graph function
def create_knn_graph(data, k, batch_size=1000):
    """Creates a KNN graph using the kneighbors_graph function with batched data."""
    n_samples = data.shape[0]
    n_batches = (n_samples - 1) // batch_size + 1

    knn_graphs = []

    for i in tqdm(range(0, n_samples, batch_size), total=n_batches, desc="Creating KNN graph"):
        batch = data[i:i+batch_size]
        batch_knn = kneighbors_graph(batch, n_neighbors=k, mode='connectivity', include_self=False, n_jobs=-1)
        knn_graphs.append(batch_knn)

    return scipy.sparse.vstack(knn_graphs)

# Create KNN graphs for each dataset
k = 2  # Define k for neighbors

print("Creating KNN graph for train data...")
train_knn_graph = create_knn_graph(train_scaled, k)

print("Creating KNN graph for validation data...")
val_knn_graph = create_knn_graph(val_scaled, k)

print("Creating KNN graph for test data...")
test_knn_graph = create_knn_graph(test_scaled, k)

# Convert CSR format to edge_index format for PyTorch Geometric
def knn_to_edge_index(knn_graph):
    """Converts a KNN graph from CSR format to COO and then to edge_index for PyTorch Geometric."""
    knn_coo = knn_graph.tocoo()

    edge_index_np = np.array([knn_coo.row, knn_coo.col])
    edge_index = torch.from_numpy(edge_index_np).long()

    return edge_index

train_edge_index = knn_to_edge_index(train_knn_graph)
val_edge_index = knn_to_edge_index(val_knn_graph)
test_edge_index = knn_to_edge_index(test_knn_graph)

# Convert the scaled datasets to PyTorch tensors
train_features = torch.tensor(train_scaled, dtype=torch.float32).to(device)
val_features = torch.tensor(val_scaled, dtype=torch.float32).to(device)
test_features = torch.tensor(test_scaled, dtype=torch.float32).to(device)

# Create PyTorch Geometric Data objects for each dataset
train_data = Data(x=train_features, edge_index=train_edge_index)
val_data = Data(x=val_features, edge_index=val_edge_index)
test_data = Data(x=test_features, edge_index=test_edge_index)

# UHG GraphSAGE Layer Definition
class UHGGraphSAGELayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(UHGGraphSAGELayer, self).__init__()
        self.weight_neigh = nn.Parameter(torch.Tensor(out_features, in_features))
        self.weight_self = nn.Parameter(torch.Tensor(out_features, in_features))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight_neigh, a=np.sqrt(5))
        nn.init.kaiming_uniform_(self.weight_self, a=np.sqrt(5))

    def forward(self, x, edge_index):
        row, col = edge_index

        # Neighbor aggregation
        neigh_sum = torch.zeros_like(x)
        neigh_sum.index_add_(0, row, x[col])
        neigh_count = torch.zeros(x.size(0), device=x.device)
        neigh_count.index_add_(0, row, torch.ones_like(row, dtype=torch.float))
        neigh_count = torch.clamp(neigh_count.unsqueeze(1), min=1)
        neigh_features = neigh_sum / neigh_count

        # Apply linear transformations
        neigh_transformed = torch.matmul(neigh_features, self.weight_neigh.t())
        self_transformed = torch.matmul(x, self.weight_self.t())

        # Combine using UHG-inspired operation (simplified addition)
        combined = neigh_transformed + self_transformed

        return F.leaky_relu(combined, negative_slope=0.01)

# UHG GraphSAGE Model Definition
class UHGGraphSAGE(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout=0.4):
        super(UHGGraphSAGE, self).__init__()
        self.layers = nn.ModuleList()
        self.dropout = nn.Dropout(0.5)

        self.layers.append(UHGGraphSAGELayer(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.layers.append(UHGGraphSAGELayer(hidden_channels, hidden_channels))
        self.layers.append(UHGGraphSAGELayer(hidden_channels, out_channels))

    def forward(self, x, adjs=None, edge_index=None):
      if adjs is not None:  # Training with neighborhood sampling
        for i, (edge_index, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes for the current layer
            x = self.layers[i](x, edge_index)
      else:  # Full graph evaluation (no sampling)
        for layer in self.layers:
            x = layer(x, edge_index)
      return x

# Initialize the model
in_channels = train_features.size(1)
hidden_channels = 32
out_channels = in_channels
num_layers = 2
model = UHGGraphSAGE(in_channels=in_channels, hidden_channels=hidden_channels,
                     out_channels=out_channels, num_layers=num_layers).to(device)

# Loss and optimizer
criterion = nn.MSELoss()  # Use MSE to reconstruct node features
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-3)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

# Neighborhood sampler with 10 neighbors at each layer
train_loader = NeighborSampler(train_data.edge_index, sizes=[10, 10], batch_size=64, shuffle=True)

# Training with neighborhood sampling
def train_with_sampling(model, optimizer, loader, data, criterion, device, accumulation_steps=8):
    model.train()
    total_loss = 0
    optimizer.zero_grad()

    # Add an explicit loop counter
    for batch_idx, (batch_size, n_id, adjs) in enumerate(tqdm(loader, desc="Training")):
        adjs = [adj.to(device) for adj in adjs]
        batch_x = data.x[n_id].to(device)

        # Forward pass
        out = model(batch_x, adjs)

        # Compute loss for the target nodes only (first batch_size nodes in the batch)
        loss = criterion(out[:batch_size], batch_x[:batch_size]) / accumulation_steps
        loss.backward()

        # Accumulate gradients and update model weights every accumulation_steps
        if (batch_idx + 1) % accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()  # Update weights
            optimizer.zero_grad()  # Reset gradients for the next accumulation
            total_loss += loss.item() * accumulation_steps  # Accumulate loss

    return total_loss / len(loader.dataset)


@torch.no_grad()
def evaluate(model, data, criterion, device):
    model.eval()
    x = data.x.to(device)
    edge_index = data.edge_index.to(device)

    # Perform forward pass without sampling
    out = model(x, edge_index=edge_index)

    # Loss computation
    loss = criterion(out, x)
    return loss.item()

num_epochs = 400
for epoch in range(1, num_epochs + 1):
    # Training with sampling
    train_loss = train_with_sampling(model, optimizer, train_loader, train_data, criterion, device)

    # Evaluate on validation set
    val_loss = evaluate(model, val_data, criterion, device)

    print(f'Epoch {epoch}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Looking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.html
Looking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.html
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cpu
Loading datasets...
Creating KNN graph for train data...


Creating KNN graph: 100%|██████████| 700/700 [00:13<00:00, 52.12it/s]


Creating KNN graph for validation data...


Creating KNN graph: 100%|██████████| 150/150 [00:03<00:00, 43.22it/s]


Creating KNN graph for test data...


Creating KNN graph: 100%|██████████| 150/150 [00:04<00:00, 37.39it/s]
Training: 100%|██████████| 10938/10938 [00:19<00:00, 557.59it/s]


Epoch 1, Train Loss: 0.0012, Val Loss: 0.4391


Training: 100%|██████████| 10938/10938 [00:19<00:00, 570.15it/s]


Epoch 2, Train Loss: 0.0006, Val Loss: 0.3580


Training: 100%|██████████| 10938/10938 [00:19<00:00, 549.28it/s]


Epoch 3, Train Loss: 0.0006, Val Loss: 0.3568


Training: 100%|██████████| 10938/10938 [00:18<00:00, 579.25it/s]


Epoch 4, Train Loss: 0.0006, Val Loss: 0.3548


Training: 100%|██████████| 10938/10938 [00:18<00:00, 582.37it/s]


Epoch 5, Train Loss: 0.0006, Val Loss: 0.3480


Training: 100%|██████████| 10938/10938 [00:18<00:00, 596.30it/s]


Epoch 6, Train Loss: 0.0006, Val Loss: 0.3415


Training: 100%|██████████| 10938/10938 [00:19<00:00, 574.40it/s]


Epoch 7, Train Loss: 0.0005, Val Loss: 0.3365


Training: 100%|██████████| 10938/10938 [00:18<00:00, 587.91it/s]


Epoch 8, Train Loss: 0.0005, Val Loss: 0.3383


Training: 100%|██████████| 10938/10938 [00:18<00:00, 582.27it/s]


Epoch 9, Train Loss: 0.0005, Val Loss: 0.3373


Training: 100%|██████████| 10938/10938 [00:18<00:00, 589.64it/s]


Epoch 10, Train Loss: 0.0004, Val Loss: 0.3390


Training: 100%|██████████| 10938/10938 [00:18<00:00, 587.01it/s]


Epoch 11, Train Loss: 0.0004, Val Loss: 0.3480


Training: 100%|██████████| 10938/10938 [00:18<00:00, 594.08it/s]


Epoch 12, Train Loss: 0.0004, Val Loss: 0.3524


Training: 100%|██████████| 10938/10938 [00:18<00:00, 587.58it/s]


Epoch 13, Train Loss: 0.0003, Val Loss: 0.3638


Training: 100%|██████████| 10938/10938 [00:18<00:00, 594.62it/s]


Epoch 14, Train Loss: 0.0003, Val Loss: 0.3691


Training: 100%|██████████| 10938/10938 [00:18<00:00, 589.60it/s]


Epoch 15, Train Loss: 0.0002, Val Loss: 0.3919


Training: 100%|██████████| 10938/10938 [00:18<00:00, 586.56it/s]


Epoch 16, Train Loss: 0.0002, Val Loss: 0.4122


Training: 100%|██████████| 10938/10938 [00:19<00:00, 573.49it/s]


Epoch 17, Train Loss: 0.0002, Val Loss: 0.4340


Training: 100%|██████████| 10938/10938 [00:18<00:00, 586.50it/s]


Epoch 18, Train Loss: 0.0001, Val Loss: 0.4598


Training: 100%|██████████| 10938/10938 [00:19<00:00, 565.70it/s]


Epoch 19, Train Loss: 0.0001, Val Loss: 0.4842


Training: 100%|██████████| 10938/10938 [00:19<00:00, 575.47it/s]


Epoch 20, Train Loss: 0.0001, Val Loss: 0.5092


Training: 100%|██████████| 10938/10938 [00:18<00:00, 603.27it/s]


Epoch 21, Train Loss: 0.0001, Val Loss: 0.5331


Training: 100%|██████████| 10938/10938 [00:18<00:00, 586.30it/s]


Epoch 22, Train Loss: 0.0001, Val Loss: 0.5343


Training: 100%|██████████| 10938/10938 [00:18<00:00, 594.23it/s]


Epoch 23, Train Loss: 0.0000, Val Loss: 0.5326


Training: 100%|██████████| 10938/10938 [00:18<00:00, 595.87it/s]


Epoch 24, Train Loss: 0.0000, Val Loss: 0.5203


Training: 100%|██████████| 10938/10938 [00:18<00:00, 604.06it/s]


Epoch 25, Train Loss: 0.0000, Val Loss: 0.5069


Training: 100%|██████████| 10938/10938 [00:18<00:00, 596.89it/s]


Epoch 26, Train Loss: 0.0000, Val Loss: 0.4992


Training: 100%|██████████| 10938/10938 [00:18<00:00, 599.90it/s]


Epoch 27, Train Loss: 0.0000, Val Loss: 0.4985


Training: 100%|██████████| 10938/10938 [00:18<00:00, 594.23it/s]


Epoch 28, Train Loss: 0.0000, Val Loss: 0.4945


Training: 100%|██████████| 10938/10938 [00:18<00:00, 594.58it/s]


Epoch 29, Train Loss: 0.0000, Val Loss: 0.4870


Training: 100%|██████████| 10938/10938 [00:18<00:00, 595.06it/s]


Epoch 30, Train Loss: 0.0000, Val Loss: 0.4838


Training: 100%|██████████| 10938/10938 [00:18<00:00, 599.53it/s]


Epoch 31, Train Loss: 0.0000, Val Loss: 0.4798


Training: 100%|██████████| 10938/10938 [00:18<00:00, 592.18it/s]


Epoch 32, Train Loss: 0.0000, Val Loss: 0.4757


Training: 100%|██████████| 10938/10938 [00:18<00:00, 589.67it/s]


Epoch 33, Train Loss: 0.0000, Val Loss: 0.4696


Training: 100%|██████████| 10938/10938 [00:18<00:00, 597.67it/s]


Epoch 34, Train Loss: 0.0000, Val Loss: 0.4613


Training: 100%|██████████| 10938/10938 [00:18<00:00, 592.71it/s]


Epoch 35, Train Loss: 0.0000, Val Loss: 0.4537


Training: 100%|██████████| 10938/10938 [00:18<00:00, 591.08it/s]


Epoch 36, Train Loss: 0.0000, Val Loss: 0.4197


Training: 100%|██████████| 10938/10938 [00:18<00:00, 593.68it/s]


Epoch 37, Train Loss: 0.0000, Val Loss: 0.4120


Training: 100%|██████████| 10938/10938 [00:18<00:00, 604.38it/s]


Epoch 38, Train Loss: 0.0000, Val Loss: 0.3939


Training: 100%|██████████| 10938/10938 [00:18<00:00, 600.70it/s]


Epoch 39, Train Loss: 0.0000, Val Loss: 0.3888


Training: 100%|██████████| 10938/10938 [00:18<00:00, 601.33it/s]


Epoch 40, Train Loss: 0.0000, Val Loss: 0.3816


Training: 100%|██████████| 10938/10938 [00:18<00:00, 588.11it/s]


Epoch 41, Train Loss: 0.0000, Val Loss: 0.3788


Training: 100%|██████████| 10938/10938 [00:18<00:00, 591.00it/s]


Epoch 42, Train Loss: 0.0000, Val Loss: 0.3738


Training: 100%|██████████| 10938/10938 [00:18<00:00, 604.20it/s]


Epoch 43, Train Loss: 0.0000, Val Loss: 0.3698


Training: 100%|██████████| 10938/10938 [00:18<00:00, 595.55it/s]


Epoch 44, Train Loss: 0.0000, Val Loss: 0.3662


Training: 100%|██████████| 10938/10938 [00:18<00:00, 596.19it/s]


Epoch 45, Train Loss: 0.0000, Val Loss: 0.3639


Training: 100%|██████████| 10938/10938 [00:18<00:00, 602.86it/s]


Epoch 46, Train Loss: 0.0000, Val Loss: 0.3543


Training: 100%|██████████| 10938/10938 [00:18<00:00, 602.96it/s]


Epoch 47, Train Loss: 0.0000, Val Loss: 0.3517


Training: 100%|██████████| 10938/10938 [00:18<00:00, 603.13it/s]


Epoch 48, Train Loss: 0.0000, Val Loss: 0.3500


Training: 100%|██████████| 10938/10938 [00:18<00:00, 601.82it/s]


Epoch 49, Train Loss: 0.0000, Val Loss: 0.3464


Training: 100%|██████████| 10938/10938 [00:18<00:00, 588.18it/s]


Epoch 50, Train Loss: 0.0000, Val Loss: 0.3452


Training: 100%|██████████| 10938/10938 [00:18<00:00, 598.17it/s]


Epoch 51, Train Loss: 0.0000, Val Loss: 0.3524


Training: 100%|██████████| 10938/10938 [00:18<00:00, 606.21it/s]


Epoch 52, Train Loss: 0.0000, Val Loss: 0.3510


Training: 100%|██████████| 10938/10938 [00:19<00:00, 564.92it/s]


Epoch 53, Train Loss: 0.0000, Val Loss: 0.3501


Training: 100%|██████████| 10938/10938 [00:19<00:00, 561.79it/s]


Epoch 54, Train Loss: 0.0000, Val Loss: 0.3400


Training: 100%|██████████| 10938/10938 [00:18<00:00, 599.05it/s]


Epoch 55, Train Loss: 0.0000, Val Loss: 0.3276


Training: 100%|██████████| 10938/10938 [00:17<00:00, 609.82it/s]


Epoch 56, Train Loss: 0.0000, Val Loss: 0.3349


Training: 100%|██████████| 10938/10938 [00:18<00:00, 597.44it/s]


Epoch 57, Train Loss: 0.0000, Val Loss: 0.3332


Training: 100%|██████████| 10938/10938 [00:20<00:00, 544.67it/s]


Epoch 58, Train Loss: 0.0000, Val Loss: 0.3316


Training: 100%|██████████| 10938/10938 [00:19<00:00, 566.12it/s]


Epoch 59, Train Loss: 0.0000, Val Loss: 0.3258


Training: 100%|██████████| 10938/10938 [00:19<00:00, 560.33it/s]


Epoch 60, Train Loss: 0.0000, Val Loss: 0.3196


Training: 100%|██████████| 10938/10938 [00:19<00:00, 571.54it/s]


Epoch 61, Train Loss: 0.0000, Val Loss: 0.3213


Training: 100%|██████████| 10938/10938 [00:18<00:00, 599.12it/s]


Epoch 62, Train Loss: 0.0000, Val Loss: 0.3194


Training: 100%|██████████| 10938/10938 [00:18<00:00, 590.95it/s]


Epoch 63, Train Loss: 0.0000, Val Loss: 0.3132


Training: 100%|██████████| 10938/10938 [00:18<00:00, 586.54it/s]


Epoch 64, Train Loss: 0.0000, Val Loss: 0.3126


Training: 100%|██████████| 10938/10938 [00:18<00:00, 604.99it/s]


Epoch 65, Train Loss: 0.0000, Val Loss: 0.3055


Training: 100%|██████████| 10938/10938 [00:17<00:00, 609.71it/s]


Epoch 66, Train Loss: 0.0000, Val Loss: 0.2932


Training: 100%|██████████| 10938/10938 [00:18<00:00, 605.14it/s]


Epoch 67, Train Loss: 0.0000, Val Loss: 0.2973


Training: 100%|██████████| 10938/10938 [00:18<00:00, 603.62it/s]


Epoch 68, Train Loss: 0.0000, Val Loss: 0.2841


Training: 100%|██████████| 10938/10938 [00:18<00:00, 596.07it/s]


Epoch 69, Train Loss: 0.0000, Val Loss: 0.2826


Training:  62%|██████▏   | 6751/10938 [00:11<00:07, 581.12it/s]


KeyboardInterrupt: 