In [2]:
import torch
from torch_geometric.data import Data

In [5]:
## 1 Data is per graph
## node features 
## edge index (adjacency matrix) (coo format = coordiate format)
## edge features
## node pos (geometric positions)
## label (node level - [node, labels], graph level [1, labels]

In [33]:
node_features = torch.tensor([[-1], [1], [2]], dtype=torch.long)
edge_index = torch.tensor([[0,1,1,2,0,2,0],
              [1,0,2,1,2,0,0]])

data = Data(x=node_features, edge_index=edge_index)

In [34]:
data.num_features
data.is_undirected()

True

In [35]:
data.has_isolated_nodes()

False

In [36]:
data.num_edges

7

In [37]:
data.has_self_loops()

True

In [40]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("device :", device)

device : cuda


In [41]:
data.to(device)

Data(x=[3, 1], edge_index=[2, 7])

In [42]:
data.to("cpu")

Data(x=[3, 1], edge_index=[2, 7])

In [43]:
from torch_geometric.datasets import TUDataset

In [44]:
tu_dataset = TUDataset("../datasets/ENZYMES", name="ENZYMES") 

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting ../datasets/ENZYMES/ENZYMES/ENZYMES.zip
Processing...
Done!


In [45]:
tu_dataset.len()

600

In [47]:
tu_dataset.num_classes # graph level?

6

In [None]:
tu_dataset.num_node_labels

In [50]:
single_data= tu_dataset[0]

In [53]:
single_data

Data(edge_index=[2, 168], x=[37, 3], y=[1])

In [54]:
single_data.num_edges

168

In [55]:
single_data.num_nodes

37

In [56]:
#Shuffling
tu_dataset = tu_dataset.shuffle()

In [57]:
tu_train_dataset = tu_dataset[:540]
tu_test_dataset = tu_dataset[540:]

In [62]:
from torch_geometric.datasets import Planetoid

In [64]:
cora_dataset = Planetoid("../datasets/Cora/", name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [66]:
cora_dataset.len() # 1 graph

1

In [69]:
cora_dataset

Cora()

In [67]:
cora_data = cora_dataset[0]

In [70]:
cora_data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [None]:
2708 nodes
1433 features
10556 edges
2708 node labels


In [76]:
cora_data['train_mask'].sum().item()

140

In [77]:
cora_data['val_mask'].sum().item()

500

In [78]:
cora_data['test_mask'].sum().item()

1000

In [81]:
from torch_scatter import scatter
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

In [91]:
tu_dataset = TUDataset("../datasets/ENZYMES", name="ENZYMES", use_node_attr=True)
tu_dataloader = DataLoader(tu_dataset, batch_size=32, shuffle=True)



In [92]:
for batch in tu_dataloader:
    print(batch)
    print(batch.num_graphs)
    x = scatter(batch.x, batch.batch, dim=0, reduce='mean')
    print(x.size())

DataBatch(edge_index=[2, 3816], x=[989, 21], y=[32], batch=[989], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4126], x=[1119, 21], y=[32], batch=[1119], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3436], x=[996, 21], y=[32], batch=[996], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4404], x=[1201, 21], y=[32], batch=[1201], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4186], x=[1067, 21], y=[32], batch=[1067], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4010], x=[1099, 21], y=[32], batch=[1099], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3772], x=[961, 21], y=[32], batch=[961], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4018], x=[1065, 21], y=[32], batch=[1065], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4578], x=[1186, 21], y=[32], batch=[1186], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4042], x=[1048, 21], y=[32], batch=[1048], ptr=[33])
32
torch.S

## Dataset transformation

In [None]:
from torch_geometric.datasets import ShapeNet
import torch_geometric.transforms as T

In [105]:
shape_net_dataset = ShapeNet("../datasets/ShapeNet", categories=['Airplane'])
shape_net_dataset[0]

Processing...
Done!


Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

In [104]:
shape_net_dataset_2 = ShapeNet("../datasets/ShapeNet", categories=['Airplane'],
                            pre_transform=T.KNNGraph(6))
shape_net_dataset_2[0]

Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1], edge_index=[2, 15108])

In [106]:
#loading the dataset
#Create the model
#Training loop
#Evaluate

In [127]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import torch.nn as nn
from torch.optim import Adam

In [122]:
cora_dataset = Planetoid("../datasets/Cora/", name="Cora")
print("Num graphs:", len(cora_dataset))
print("Num Nodes:", cora_dataset[0].num_nodes)
print("Num Nodes features:", cora_dataset.num_node_features)
print("Num edges:", cora_dataset[0].num_edges)
print("Num classes:", cora_dataset.num_classes)
print("Keys:", cora_dataset[0].keys)


Num graphs: 1
Num Nodes: 2708
Num Nodes features: 1433
Num edges: 10556
Num classes: 7
Keys: ['val_mask', 'test_mask', 'train_mask', 'x', 'y', 'edge_index']


In [135]:
def get_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

class CustomGCN(nn.Module):
    def __init__(self, dataset, hidden_layer=16):
        super().__init__()
        self.num_node_features = dataset.num_node_features
        self.hidden_layer = hidden_layer
        self.num_classes = dataset.num_classes
        self.conv1 = GCNConv(self.num_node_features, hidden_layer)
        self.conv2 = GCNConv(hidden_layer, self.num_classes)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index=edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index=edge_index)
        return F.log_softmax(x, dim=1)
    

In [116]:
device = get_device()
print(f"Device: {device}")

Device: cuda


In [143]:
model = CustomGCN(cora_dataset)

In [156]:
def train_single_loop(data, model, optimizer):
    model = model.train()
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask])
    print("loss: ", loss)
    loss.backward()
    optimizer.step()

In [157]:
optimizer = Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [158]:
model = model.to(device)
data = cora_dataset[0].to(device)

In [159]:
num_epoch = 200
for epoch in range(num_epoch):
    print(f"Epoch {epoch}")
    train_single_loop(data, model, optimizer)

Epoch 0
loss:  tensor(0.0399, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 1
loss:  tensor(0.0437, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 2
loss:  tensor(0.0241, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 3
loss:  tensor(0.0383, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 4
loss:  tensor(0.0307, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 5
loss:  tensor(0.0312, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 6
loss:  tensor(0.0241, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 7
loss:  tensor(0.0247, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 8
loss:  tensor(0.0302, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 9
loss:  tensor(0.0397, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 10
loss:  tensor(0.0143, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 11
loss:  tensor(0.0314, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 12
loss:  tensor(0.0282, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch 13
loss:  tensor

In [160]:
model.eval()
pred = model(data).detach().argmax(dim=1)
correct = (pred[data.test_mask]==data.y[data.test_mask]).sum()
accuracy = correct/data.test_mask.sum()
accuracy.item()

0.7960000038146973