# Using CSBM generated data to train a model

In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import math
from MultiClassCSBM import MultiClassCSBM

In [2]:
n = 5000
d = 100
c = 20

In [3]:
csbm = MultiClassCSBM(n=n, dimensions=d, classes=c)
data_list = []
for _ in range(10):
    data_list.append(csbm.data)
    csbm.evolve()

[Data(x=[100, 25], edge_index=[2, 1782], y=[100], train_mask=[100], test_mask=[100]), Data(x=[200, 25], edge_index=[2, 5345], y=[200], train_mask=[200], test_mask=[200]), Data(x=[300, 25], edge_index=[2, 10736], y=[300], train_mask=[300], test_mask=[300]), Data(x=[400, 25], edge_index=[2, 17857], y=[400], train_mask=[400], test_mask=[400]), Data(x=[500, 25], edge_index=[2, 26884], y=[500], train_mask=[500], test_mask=[500]), Data(x=[600, 25], edge_index=[2, 37560], y=[600], train_mask=[600], test_mask=[600]), Data(x=[700, 25], edge_index=[2, 50165], y=[700], train_mask=[700], test_mask=[700]), Data(x=[800, 25], edge_index=[2, 64456], y=[800], train_mask=[800], test_mask=[800]), Data(x=[900, 25], edge_index=[2, 80724], y=[900], train_mask=[900], test_mask=[900]), Data(x=[1000, 25], edge_index=[2, 98594], y=[1000], train_mask=[1000], test_mask=[1000])]


In [4]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

dataloader = DataLoader(data_list, batch_size=32)

In [5]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(d, 16)
        self.conv2 = GCNConv(16, c)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [6]:
num_epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(num_epochs):
    model.train()
    for batch in dataloader:
        data = batch.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')

Epoch 1/10, Loss: 1.5782885551452637
Epoch 2/10, Loss: 1.5589663982391357
Epoch 3/10, Loss: 1.5330696105957031
Epoch 4/10, Loss: 1.506467342376709
Epoch 5/10, Loss: 1.480781078338623
Epoch 6/10, Loss: 1.4525907039642334
Epoch 7/10, Loss: 1.4323838949203491
Epoch 8/10, Loss: 1.4055958986282349
Epoch 9/10, Loss: 1.3797435760498047
Epoch 10/10, Loss: 1.3514959812164307


In [7]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.6923
