# Using CSBM generated data to train a model

In [1]:
import torch
import matplotlib.pyplot as plt
from csbms import MultiClassCSBM, FeatureCSBM, StructureCSBM

In [2]:
n = 1600
d = 128
c = 4

In [3]:
csbm = FeatureCSBM(n=n, dimensions=d, classes=c)
data_list = [csbm.data]
for _ in range(9):
    csbm.evolve()
    data_list.append(csbm.data)

In [4]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(d, 16)
        self.conv2 = GCNConv(16, c)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

## Retrain model for each task

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

print('\n' + 10 * '-' + ' Training and evaluating the model on each task ' + 10 * '-')
for task, data in enumerate(data_list):
    data = data.to(device)
    model.train()
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
    model.eval()
    pred = model(data).argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct) / int(data.test_mask.sum())
    print(f'Task {task+1:02d}, Accuracy: {acc:.4f}')

cuda

---------- Training and evaluating the model on each task ----------
Task 01, Accuracy: 0.6648
Task 02, Accuracy: 0.6562
Task 03, Accuracy: 0.6602
Task 04, Accuracy: 0.6461
Task 05, Accuracy: 0.6367
Task 06, Accuracy: 0.6602
Task 07, Accuracy: 0.6625
Task 08, Accuracy: 0.6727
Task 09, Accuracy: 0.6672
Task 10, Accuracy: 0.7469


In [6]:
print('\n' + 10 * '-' + ' Evaluation after training the model on all tasks ' + 10 * '-')
model.eval()
for task, data in enumerate(data_list):
    data = data.to(device)
    pred = model(data).argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct) / int(data.test_mask.sum())
    print(f'Task {task+1:02d}, Accuracy: {acc:.4f}')


---------- Evaluation after training the model on all tasks ----------
Task 01, Accuracy: 0.2094
Task 02, Accuracy: 0.2789
Task 03, Accuracy: 0.3312
Task 04, Accuracy: 0.3656
Task 05, Accuracy: 0.4602
Task 06, Accuracy: 0.5781
Task 07, Accuracy: 0.6367
Task 08, Accuracy: 0.6867
Task 09, Accuracy: 0.7188
Task 10, Accuracy: 0.7469


## Train model on T1 and evaluate on other tasks

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

data = data_list[0].to(device)
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

cuda


In [8]:
model.eval()
for task, data in enumerate(data_list):
    data = data.to(device)
    pred = model(data).argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct) / int(data.test_mask.sum())
    print(f'Task {task+1:02d}, Accuracy: {acc:.4f}')

Task 01, Accuracy: 0.6555
Task 02, Accuracy: 0.5555
Task 03, Accuracy: 0.5180
Task 04, Accuracy: 0.5383
Task 05, Accuracy: 0.4945
Task 06, Accuracy: 0.4406
Task 07, Accuracy: 0.3781
Task 08, Accuracy: 0.3109
Task 09, Accuracy: 0.2750
Task 10, Accuracy: 0.2141
