In [6]:
# Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T



In the context of graph neural networks (GNNs), the input data typically consists of three parts:

The graph structure: This is typically represented as an adjacency matrix or an edge list. The adjacency matrix is a square matrix where the (i,j)-th entry is 1 if there is an edge from node i to node j, and 0 otherwise. An edge list is simply a list of tuples (i,j) representing the edges in the graph.

Node features: These are the properties of each node in the graph that the model uses to learn representations. Node features can include things like text embeddings, image features, or numerical attributes. In the case of the Cora dataset, each node is represented by a 1433-dimensional binary vector indicating the presence or absence of certain keywords in a document.

Node labels: These are the ground truth labels for each node that the model tries to predict. In the case of the Cora dataset, each node is assigned one of seven possible labels indicating the topic of the corresponding document.
The data for the Cora dataset looks like this:

css
Copy code
Data(edge_index=[2, 10556], x=[2708, 1433], y=[2708])
Here, edge_index is the edge list representing the graph structure, x is the node feature matrix, and y is the node label vector.

In more detail, edge_index is a 2D tensor of shape (2, E) where E is the number of edges in the graph. 
The first row of edge_index contains the indices of the source nodes of each edge, while the second row contains the indices of the target nodes. 
Each row of x represents the feature vector for a node, and y is a 1D tensor of length N where N is the number of nodes in the graph. 
The i-th entry of y contains the label for the i-th node.

In [None]:
#Graphical Convelution Network model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)  # define the first GCN layer
        self.conv2 = GCNConv(hidden_dim, output_dim) # define the second GCN layer

    def forward(self, data):
        # Pass the node features and adjacency matrix through the first layer
        x, edge_index = data.x, data.edge_index  # read each variable
        x = F.relu(self.conv1(x, edge_index)) # apply the ReLU activation function on first layer
        # Pass the output of the first layer through the second layer
        x = self.conv2(x, edge_index)  
        return F.log_softmax(x, dim=1)

In [None]:
# Prepare the data
adjacency_matrix = [[0, 1, 1, 0], [1, 0, 1, 1], [1, 1, 0, 1], [0, 1, 1, 0]]
num_nodes = len(adjacency_matrix)
edge_index = torch.tensor([[i, j] for i in range(num_nodes) for j in range(num_nodes) if adjacency_matrix[i][j] == 1],
                          dtype=torch.long).t()
node_features = torch.randn((num_nodes, 16))
node_labels = torch.tensor([0, 1, 1, 0], dtype=torch.long)

data = Data(x=node_features, edge_index=edge_index, y=node_labels)

In [None]:
# Prepare the data
# Create a torch geometric data object
# data = Data(x=torch.tensor(node_props, dtype=torch.float),
#             edge_index=torch.tensor(adj_matrix.nonzero(), dtype=torch.long),
#             y=torch.tensor(labels, dtype=torch.long))

# Split the data into training and validation sets
train_data, val_data = torch.utils.data.random_split(data, [int(0.8*len(data)), len(data)-int(0.8*len(data))])

# Create data loaders for batch processing
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)


In [None]:
# Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(input_dim=node_props.shape[1], hidden_dim=64, output_dim=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

def train(model, loader, optimizer, criterion, device):
    model.train()   # set the model in training mode
    total_loss, total_correct = 0, 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()  # zero the gradients
        output = model(data)  # forward pass
        loss = criterion(output, data.y)  # compute the loss on labeled nodes only
        loss.backward()  # backward pass
        optimizer.step()  # update the parameters
        total_loss += loss.item()
        total_correct += (output.argmax(dim=1) == data.y).sum().item()
    return total_loss/len(loader.dataset), total_correct/len(loader.dataset)

def validate(model, loader, criterion, device):
    model.eval() # set the model in evaluation mode
    total_loss, total_correct = 0, 0
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            output = model(data)  # forward pass
            loss = criterion(output, data.y) 
            total_loss += loss.item()
            total_correct += (output.argmax(dim=1) == data.y).sum().item() # compute the number of correct predictions
    return total_loss/len    # compute the accuracy

In [None]:
for epoch in range(200):
    train() # train the model for one epoch
    test() # evaluate the model on the test set