In [1]:
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

# Load nodes and edges from CSV files
nodes_df = pd.read_csv('FortinetNodes2.graphml.csv')
edges_df = pd.read_csv('FortinetEdges2.graphml.csv')

# Display the nodes and edges DataFrames
print("Nodes DataFrame:")
print(nodes_df.head())

print("Edges DataFrame:")
print(edges_df.head())

# Create feature tensor from node features
# Assuming 'Weight' is a feature we want to use, you can add more features if available
x = torch.tensor(nodes_df[['Weight']].values, dtype=torch.float32)

# Create edge index tensor
edge_index = torch.tensor(edges_df[['Source', 'Target']].values.T, dtype=torch.long)

# Create labels tensor
# Assuming 'Label' column contains the node labels
labels = pd.factorize(nodes_df['Label'])[0]
labels = torch.tensor(labels, dtype=torch.long)

# Create PyTorch Geometric Data object
data = Data(x=x, edge_index=edge_index, y=labels)

# Display the Data object
print(data)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Initialize model, optimizer, and loss function
model = GCN(in_channels=x.shape[1], hidden_channels=4, out_channels=len(set(labels.tolist())))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

n_epochs = 100
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

model.eval()
with torch.no_grad():
    out = model(data)
    predicted_labels = out.argmax(dim=1)

true_labels = data.y
correct_predictions = (predicted_labels == true_labels).sum().item()
total_predictions = len(true_labels)
accuracy = (correct_predictions / total_predictions) * 100
print(f"Accuracy: {accuracy:.2f}%")


Nodes DataFrame:
   Source  Target        Type  Id  Label  Weight
0       0       3  Undirected   0      0       1
1       1       3  Undirected   1      1       1
2       2       3  Undirected   2      2       1
3       4       3  Undirected   3      3       1
4       4       5  Undirected   4      4       1
Edges DataFrame:
   Source  Target        Type  Id  Label  Weight
0       0       3  Undirected   0      0       1
1       1       3  Undirected   1      1       1
2       2       3  Undirected   2      2       1
3       4       3  Undirected   3      3       1
4       4       5  Undirected   4      4       1
Data(x=[6, 1], edge_index=[2, 6], y=[6])
Epoch 0, Loss: 1.910666584968567
Epoch 10, Loss: 1.7812498807907104
Epoch 20, Loss: 1.7417258024215698
Epoch 30, Loss: 1.723252296447754
Epoch 40, Loss: 1.7024868726730347
Epoch 50, Loss: 1.6821836233139038
Epoch 60, Loss: 1.6624704599380493
Epoch 70, Loss: 1.6430295705795288
Epoch 80, Loss: 1.624009132385254
Epoch 90, Loss: 1.60546350