In [1]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
Data(edge_index=[2, 4], x=[3, 1])

Data(x=[2], edge_index=[2])

In [2]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())
Data(edge_index=[2, 4], x=[3, 1])

Data(x=[2], edge_index=[2])

In [3]:
data.validate(raise_on_error=True)

True

In [5]:
print(data.keys())


print(data['x'])


for key, item in data:
    print(f'{key} found in data')


print('edge_attr' in data)


print(data.num_nodes)


print(data.num_edges)

print(data.num_node_features)

print(data.has_isolated_nodes())


print(data.has_self_loops())

print(data.is_directed())


# Transfer data object to GPU.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
data = data.to(device)

['edge_index', 'x']
tensor([[-1.],
        [ 0.],
        [ 1.]])
x found in data
edge_index found in data
False
3
4
1
False
False
False


In [8]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

print(dataset)

print(len(dataset))


print(dataset.num_classes)


print(dataset.num_node_features)


ENZYMES(600)
600
6
3


In [9]:
data = dataset[0]
print(Data(edge_index=[2, 168], x=[37, 3], y=[1]))

print(data.is_undirected())


Data(x=[2], edge_index=[2], y=[1])
True


In [10]:
train_dataset = dataset[:540]
print(train_dataset)

test_dataset = dataset[540:]
print(test_dataset)

ENZYMES(540)
ENZYMES(60)


In [11]:
dataset = dataset.shuffle()
print(dataset)

ENZYMES(600)


In [12]:
perm = torch.randperm(len(dataset))
dataset = dataset[perm]
print(dataset)

ENZYMES(600)


In [13]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
print(dataset)

print(len(dataset))

print(dataset.num_classes)


print(dataset.num_node_features)


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...


Cora()
1
7
1433


Done!


In [14]:
data = dataset[0]
Data(edge_index=[2, 10556], test_mask=[2708],
         train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])

print(data.is_undirected())


print(data.train_mask.sum().item())


print(data.val_mask.sum().item())


print(data.test_mask.sum().item())


True
140
500
1000


In [16]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    print(batch)

    print(batch.num_graphs)
    

DataBatch(edge_index=[2, 4006], x=[1058, 21], y=[32], batch=[1058], ptr=[33])
32
DataBatch(edge_index=[2, 3398], x=[1025, 21], y=[32], batch=[1025], ptr=[33])
32
DataBatch(edge_index=[2, 4666], x=[1301, 21], y=[32], batch=[1301], ptr=[33])
32
DataBatch(edge_index=[2, 3424], x=[884, 21], y=[32], batch=[884], ptr=[33])
32
DataBatch(edge_index=[2, 4120], x=[1050, 21], y=[32], batch=[1050], ptr=[33])
32
DataBatch(edge_index=[2, 3760], x=[991, 21], y=[32], batch=[991], ptr=[33])
32
DataBatch(edge_index=[2, 3828], x=[985, 21], y=[32], batch=[985], ptr=[33])
32
DataBatch(edge_index=[2, 4072], x=[1054, 21], y=[32], batch=[1054], ptr=[33])
32
DataBatch(edge_index=[2, 4054], x=[1060, 21], y=[32], batch=[1060], ptr=[33])
32
DataBatch(edge_index=[2, 4042], x=[1032, 21], y=[32], batch=[1032], ptr=[33])
32
DataBatch(edge_index=[2, 3654], x=[942, 21], y=[32], batch=[942], ptr=[33])
32
DataBatch(edge_index=[2, 4460], x=[1139, 21], y=[32], batch=[1139], ptr=[33])
32
DataBatch(edge_index=[2, 3876], x=[9

In [17]:
from torch_geometric.utils import scatter
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:
    print(data)

    print(data.num_graphs)


    x = scatter(data.x, data.batch, dim=0, reduce='mean')
    print(x.size())


DataBatch(edge_index=[2, 3880], x=[1093, 21], y=[32], batch=[1093], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4462], x=[1260, 21], y=[32], batch=[1260], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4180], x=[1108, 21], y=[32], batch=[1108], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3756], x=[998, 21], y=[32], batch=[998], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3648], x=[971, 21], y=[32], batch=[971], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4244], x=[1112, 21], y=[32], batch=[1112], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3796], x=[988, 21], y=[32], batch=[988], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4158], x=[1107, 21], y=[32], batch=[1107], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3422], x=[899, 21], y=[32], batch=[899], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4306], x=[1183, 21], y=[32], batch=[1183], ptr=[33])
32
torch.Siz

In [22]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
dataset

Cora()

In [23]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [25]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.8210
