In [2]:
import pandas as pd
import networkx as nx
import torch
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

# Load nodes and edges from CSV files
nodes_df = pd.read_csv('ComplexNodes2.gml.csv')
edges_df = pd.read_csv('ComplexEdges2.gml.csv')

# Display the entire DataFrame for nodes and edges
print("Nodes DataFrame:")
print(nodes_df)
print(f"Total nodes loaded: {len(nodes_df)}")

print("Edges DataFrame:")
print(edges_df)
print(f"Total edges loaded: {len(edges_df)}")

# Create a NetworkX graph
G = nx.Graph()

# Add nodes with dummy features (since only Id and Label are provided)
for _, row in nodes_df.iterrows():
    G.add_node(row['Id'], label=row['Label'], feature1=1.0, feature2=1.0)  # Example features

print(f"Total nodes in NetworkX graph: {G.number_of_nodes()}")

# Add edges
for _, row in edges_df.iterrows():
    G.add_edge(row['Source'], row['Target'])

print(f"Total edges in NetworkX graph: {G.number_of_edges()}")

# Convert NetworkX graph to PyTorch Geometric data format
x = torch.tensor([[1.0, 1.0] for _ in range(len(nodes_df))], dtype=torch.float32)  # Dummy features
edge_index = torch.tensor(edges_df[['Source', 'Target']].values.T, dtype=torch.long)

# Create dummy labels for the nodes
labels = torch.tensor([0 for _ in range(len(nodes_df))], dtype=torch.long)  # Example labels

data = Data(x=x, edge_index=edge_index, y=labels)
print(data)

# Check the shape of the data
print(f"Shape of node features: {data.x.shape}")
print(f"Shape of edge index: {data.edge_index.shape}")
print(f"Shape of labels: {data.y.shape}")

# Print actual node features and edge indices
print("Node features (x):")
print(data.x)

print("Edge index (edge_index):")
print(data.edge_index)

print("Labels (y):")
print(data.y)

# Define the GCN model
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

model = GCN(in_channels=2, hidden_channels=4, out_channels=2)  # Adjust parameters as needed

# Set up the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Training the model
n_epochs = 100
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')


Nodes DataFrame:
    Id                 Label
0    0              Internet
1    1   Perimeter FortiGate
2    2     Core FortiSwitch1
3    3     Core FortiSwitch2
4    4          SCADA Server
5    5         FortiAnalyzer
6    6              FortiNAC
7    7             FortiSIEM
8    8                   HMI
9    9          FortiManager
10  10    Internal FortiGate
11  11  Access FortiSwitch 1
12  12             FortiAP 1
13  13                 PLC 1
14  14  Access FortiSwitch 2
15  15             FortiAP 2
16  16                 PLC 2
17  17  Access FortiSwitch 3
18  18             FortiAP 3
19  19                 PLC 3
20  20  Access FortiSwitch 4
21  21                 PLC 4
Total nodes loaded: 22
Edges DataFrame:
    Source  Target        Type  Id  Label
0        0       1  Undirected   0    NaN
1        1       2  Undirected   1    NaN
2        1       3  Undirected   2    NaN
3        2       4  Undirected   3    NaN
4        2       5  Undirected   4    NaN
5        2       6  Undi