## Load dataset

The Twitch dataset we will use is available as in the ```torch_geometric``` package.

In [1]:
import torch
from torch_geometric.datasets import Twitch

dataset = Twitch(root='./data/Twitch', name='DE')

  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):
  return torch.load(f, map_location)


### Show statistics of the network

In [2]:
# number of nodes and edges
dataset.print_summary()

Twitch (#graphs=1):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     9498 |   315774 |
| std        |      nan |      nan |
| min        |     9498 |   315774 |
| quantile25 |     9498 |   315774 |
| median     |     9498 |   315774 |
| quantile75 |     9498 |   315774 |
| max        |     9498 |   315774 |
+------------+----------+----------+


  std=data.std().item(),


In [3]:
# number of node classes
print(dataset.num_classes)

2


In [4]:
# number of node features
print(dataset.num_node_features)

128


In [5]:
# number of nodes and edges
N = len(dataset[0].x)

### Split training and test data

In [6]:
# param: define the size of the training dataset
train_size = .3

# Generate random permutation of node indices
perm = torch.randperm(N)

# Select train and test nodes
train_idx = perm[: int(train_size * N)]
test_idx = perm[int(train_size * N) :]

# Initialize train_mask and test_mask with False
train_mask = torch.zeros(N, dtype=torch.bool)
test_mask = torch.zeros(N, dtype=torch.bool)

# Set the selected indices to True
train_mask[train_idx] = True
test_mask[test_idx] = True

## Define and train a simply Graph Convolutional Network model

In [7]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 32)
        self.conv2 = GCNConv(32, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [8]:
# find the best device to run on
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
# move the model and data to the device
model = GCN().to(device)
data = dataset[0].to(device)
train_mask = train_mask.to(device)
test_mask = test_mask.to(device)

In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[train_mask], data.y[train_mask])

    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
        
    loss.backward()
    optimizer.step()

Epoch: 000, Loss: 0.6719
Epoch: 010, Loss: 0.5876
Epoch: 020, Loss: 0.5621
Epoch: 030, Loss: 0.5486
Epoch: 040, Loss: 0.5412
Epoch: 050, Loss: 0.5266
Epoch: 060, Loss: 0.5193
Epoch: 070, Loss: 0.5083
Epoch: 080, Loss: 0.5055
Epoch: 090, Loss: 0.5005
Epoch: 100, Loss: 0.4817
Epoch: 110, Loss: 0.4821
Epoch: 120, Loss: 0.4803
Epoch: 130, Loss: 0.4735
Epoch: 140, Loss: 0.4702
Epoch: 150, Loss: 0.4660
Epoch: 160, Loss: 0.4642
Epoch: 170, Loss: 0.4576
Epoch: 180, Loss: 0.4550
Epoch: 190, Loss: 0.4579


## Evaluation

To evaluate the Graph Convolutional Network model we just trained, we use it to predict the labels of the nodes in the test set, and compare the results with the ground truth. We print the accuracy as the metrics.

In [11]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[test_mask] == data.y[test_mask]).sum()
acc = int(correct) / int(test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.6760
