In [1]:
import pandas as pd
import networkx as nx
import torch
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import time  # Import the time module

# Load nodes and edges from CSV files
nodes_df = pd.read_csv('ICS_OT NodesInfected.csv')
edges_df = pd.read_csv('ICS_OT EdgesInfected.csv')

# Display the entire DataFrame for nodes and edges
print("Nodes DataFrame:")
print(nodes_df.head())

print("Edges DataFrame:")
print(edges_df.head())

# Create a NetworkX graph
G = nx.Graph()

# Add nodes with dummy features (since only Id and Label are provided)
for _, row in nodes_df.iterrows():
    G.add_node(row['Id'], label=row['Label'], feature1=1.0, feature2=1.0)  # Example features

print(f"Total nodes in NetworkX graph: {G.number_of_nodes()}")

# Add edges
for _, row in edges_df.iterrows():
    G.add_edge(row['Source'], row['Target'])

print(f"Total edges in NetworkX graph: {G.number_of_edges()}")

# Convert NetworkX graph to PyTorch Geometric data format
x = torch.tensor([[1.0, 1.0] for _ in range(len(nodes_df))], dtype=torch.float32)  # Dummy features
edge_index = torch.tensor(edges_df[['Source', 'Target']].values.T, dtype=torch.long)

# Ensure all indices are within the valid range of node indices
valid_edges_mask = (edge_index[0] < len(nodes_df)) & (edge_index[1] < len(nodes_df))
edge_index = edge_index[:, valid_edges_mask]

# Create dummy labels for the nodes
labels = torch.tensor([0 for _ in range(len(nodes_df))], dtype=torch.long)  # Example labels

data = Data(x=x, edge_index=edge_index, y=labels)
print(data)

# Define the GCN model using spectral convolutions
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

model = GCN(in_channels=2, hidden_channels=4, out_channels=2)  # Adjust parameters as needed

# Set up the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Record the start time
start_time = time.time()

# Training the model
n_epochs = 100
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Record the end time
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time
print(f'Training completed in {elapsed_time:.2f} seconds')

Nodes DataFrame:
   Id                  Label
0   2               Switch 1
1   1  DMZ Internal Firewall
2   3           SCADA Server
3   4                    EWS
4   5                  HMI 1
Edges DataFrame:
   Source  Target        Type  Id  Label  Weight
0       1       2  Undirected   0    NaN       1
1       2       3  Undirected   1    NaN       1
2       2       4  Undirected   2    NaN       1
3       2       5  Undirected   3    NaN       1
4       2       6  Undirected   4    NaN       1
Total nodes in NetworkX graph: 31
Total edges in NetworkX graph: 34
Data(x=[31, 2], edge_index=[2, 33], y=[31])
Epoch 0, Loss: 0.6285448670387268
Epoch 10, Loss: 0.23488478362560272
Epoch 20, Loss: 0.09418542683124542
Epoch 30, Loss: 0.046969134360551834
Epoch 40, Loss: 0.028640905395150185
Epoch 50, Loss: 0.020080193877220154
Epoch 60, Loss: 0.015356597490608692
Epoch 70, Loss: 0.012378690764307976
Epoch 80, Loss: 0.010309707373380661
Epoch 90, Loss: 0.008773982524871826
Training completed in