In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

### Data Handling of Graphs

In [1]:
import torch
from torch_geometric.data import Data
# create four edges
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
# assign the values for each node
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
# notice for edges we have a matrix
# for node values: we have a single column
print(data)

Data(edge_index=[2, 4], x=[3, 1])


In [5]:
''' Write the edge in a more intuitive way: '''
edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())
print(data)

Data(edge_index=[2, 4], x=[3, 1])


In [6]:
print(data.keys)

['x', 'edge_index']


In [7]:
print(data['x'])

tensor([[-1.],
        [ 0.],
        [ 1.]])


In [5]:
# used like a dictionary:
for key, item in data:
    print("{} found in data".format(key))

edge_index found in data
x found in data


In [6]:
print(data.num_nodes, data.num_edges, data.num_node_features)

3 4 1


In [7]:
device = torch.device('cuda')
data = data.to(device)

### Common Benchmark Datasets

In [7]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='~/tmp/ENZYMES', name='ENZYMES')
print(len(dataset))
print(dataset.num_classes)
print(dataset.num_node_features)

600
6
3


In [8]:
data = dataset[0]
print(data.is_undirected())
print(data)
train_dataset = dataset[:540]
test_dataset = dataset[540:]
dataset = dataset.shuffle()
print(len(data))

True
Data(edge_index=[2, 168], x=[37, 3], y=[1])
3


####  Let’s download Cora, the standard benchmark dataset for semi-supervised graph node classification:

In [4]:
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='~/tmp/Cora', name='Cora')

print(len(dataset), dataset.num_classes, dataset.num_node_features)

1 7 1433


In [6]:
# the dataset contains only a single, undirected citation graph:
data = dataset[0]
print(data.is_undirected())
# the sum still returns a tensor type of data
# torch also defines its own data type: torch.uint8
print(type(data.train_mask), '\n', data.train_mask)
print(data.train_mask.sum())
print(data.train_mask.sum().item(), data.val_mask.sum().item(), data.test_mask.sum().item())
print(len(data))

True
<class 'torch.Tensor'> 
 tensor([1, 1, 1,  ..., 0, 0, 0], dtype=torch.uint8)
tensor(140)
140 500 1000
6


#### Mini-batches

In [12]:
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='~/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# for batch in loader:
# #     print(batch)
#     print(batch.num_graphs)

In [13]:
'''
batch is a column vector which maps each node to its respective graph in the batch:
You can use it to, e.g., average node features in the node dimension for each graph individually:
'''
from torch_scatter import scatter_mean
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='~/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:

    x = scatter_mean(data.x, data.batch, dim=0)
    print(x.size())


torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([32, 21])
torch.Size([24, 21])


### Data Transforms

In [14]:
'''
We can convert the point cloud dataset into a graph dataset by generating nearest 
neighbor graphs from the point clouds via transforms:
'''
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='~/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6))

print(dataset[0])

Data(category=[1], edge_index=[2, 15108], pos=[2518, 3], y=[2518])


In [15]:
'''
In addition, we can use the transform argument to randomly augment a Data object, 
e.g. translating each node position by a small number:
'''
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='~/tmp/ShapeNet', categories=['Airplane'],
                    pre_transform=T.KNNGraph(k=6),
                    transform=T.RandomTranslate(0.01))

print(dataset[0])

Data(category=[1], edge_index=[2, 15108], pos=[2518, 3], y=[2518])


### Check the size of each dataset

In [45]:
# from torch_geometric.datasets import KarateClub, TUDataset, Planetoid, CoraFull
import torch_geometric.datasets as db
import importlib

import inspect
from pprint import pprint

data_list = inspect.getmembers(db, inspect.isclass)
# pprint(data_list)
dataset_names = [ele[0] for ele in data_list]
print(dataset_names)



['Amazon', 'BitcoinOTC', 'CoMA', 'Coauthor', 'CoraFull', 'DBP15K', 'DynamicFAUST', 'Entities', 'FAUST', 'GDELT', 'GEDDataset', 'GeometricShapes', 'ICEWS18', 'KarateClub', 'MNISTSuperpixels', 'ModelNet', 'PCPNetDataset', 'PPI', 'PascalVOCKeypoints', 'Planetoid', 'QM7b', 'QM9', 'Reddit', 'S3DIS', 'SHREC2016', 'ShapeNet', 'TOSCA', 'TUDataset', 'WILLOWObjectClass']


In [63]:
from torch_geometric.datasets import Amazon
dataset = Amazon(root='~/tmp/Amazon/Computers', name='Computers')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)

dataset = Amazon(root='~/tmp/Amazon/Photo', name='Photo')
print(len(dataset))
data = dataset[0]
print(data)

1 10 767
Data(edge_index=[2, 491722], x=[13752, 767], y=[13752])
1
Data(edge_index=[2, 238162], x=[7650, 745], y=[7650])


In [64]:
from torch_geometric.datasets import Planetoid
# sub_data_list = inspect.getmembers(Planetoid, inspect.isfunction)
# pprint(sub_data_list)
dataset = Planetoid(root='~/tmp/Planetoid/Cora', name='Cora')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)

dataset = Planetoid(root='~/tmp/Planetoid/CiteSeer', name='CiteSeer')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)

dataset = Planetoid(root='~/tmp/Planetoid/PubMed', name='PubMed')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)

1 7 1433
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
1 6 3703
Data(edge_index=[2, 9104], test_mask=[3327], train_mask=[3327], val_mask=[3327], x=[3327, 3703], y=[3327])
1 3 500
Data(edge_index=[2, 88648], test_mask=[19717], train_mask=[19717], val_mask=[19717], x=[19717, 500], y=[19717])


In [65]:
from torch_geometric.datasets import PPI
dataset = PPI(root='~/tmp/PPI')
print(len(dataset), dataset.num_classes, dataset.num_node_features)

for data in dataset:
    print(data)


20 121 50
Data(edge_index=[2, 32318], x=[1767, 50], y=[1767, 121])
Data(edge_index=[2, 29704], x=[1377, 50], y=[1377, 121])
Data(edge_index=[2, 59644], x=[2263, 50], y=[2263, 121])
Data(edge_index=[2, 65430], x=[2339, 50], y=[2339, 121])
Data(edge_index=[2, 36162], x=[1578, 50], y=[1578, 121])
Data(edge_index=[2, 18216], x=[1021, 50], y=[1021, 121])
Data(edge_index=[2, 44330], x=[1823, 50], y=[1823, 121])
Data(edge_index=[2, 70390], x=[2488, 50], y=[2488, 121])
Data(edge_index=[2, 7708], x=[591, 50], y=[591, 121])
Data(edge_index=[2, 106198], x=[3312, 50], y=[3312, 121])
Data(edge_index=[2, 64218], x=[2401, 50], y=[2401, 121])
Data(edge_index=[2, 46268], x=[1878, 50], y=[1878, 121])
Data(edge_index=[2, 45768], x=[1819, 50], y=[1819, 121])
Data(edge_index=[2, 106754], x=[3480, 50], y=[3480, 121])
Data(edge_index=[2, 85318], x=[2794, 50], y=[2794, 121])
Data(edge_index=[2, 59862], x=[2326, 50], y=[2326, 121])
Data(edge_index=[2, 77064], x=[2650, 50], y=[2650, 121])
Data(edge_index=[2, 85

In [66]:
from torch_geometric.datasets import Reddit
dataset = Reddit(root='~/tmp/Reddit')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)

1 41 602
Data(edge_index=[2, 114615892], test_mask=[232965], train_mask=[232965], val_mask=[232965], x=[232965, 602], y=[232965])


## Learning Methods on Graphs

In [2]:

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        # here we introduce the non-linearity
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [3]:
'''Allow additional attributes of the instance to be set by defining: __setitem__ function '''
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='~/tmp/Planetoid/PubMed', name='PubMed')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print('info (attributes) of a single data instance')
print(data, '\n', data.num_nodes, data.num_edges, data.num_node_features, data.num_edge_features, data.keys, data.num_faces)
print(data.train_mask.shape)   # 1-D attributes

1 3 500
info (attributes) of a single data instance
Data(edge_index=[2, 88648], test_mask=[19717], train_mask=[19717], val_mask=[19717], x=[19717, 500], y=[19717]) 
 19717 88648 500 0 ['x', 'edge_index', 'y', 'train_mask', 'val_mask', 'test_mask'] None
torch.Size([19717])


In [4]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# start to dig into the class inner functions
conv1 = GCNConv(dataset.num_node_features, 16).cuda()
conv2 = GCNConv(16, dataset.num_classes).cuda()

# detail inside the forward of the model
x, edge_index = data.x.cuda(), data.edge_index.cuda()
# each row inside the x is the feature vector of a single node inside the graph
print('node value: ', x.shape, type(x))
print('edge index value: ', edge_index.shape, type(edge_index))
x = conv1(x, edge_index)
print('after the first convolution layer, node value: ', x.shape, type(x))
x = F.relu(x)
print('after the relu layer, node value: ', x.shape, type(x))
x = F.dropout(x, training = True)
print('after the dropout, node value: ', x.shape, type(x))
x = conv2(x, edge_index)
print('after the second convolution layer, node value: ', x.shape, type(x))


node value:  torch.Size([19717, 500]) <class 'torch.Tensor'>
edge index value:  torch.Size([2, 88648]) <class 'torch.Tensor'>
after the first convolution layer, node value:  torch.Size([19717, 16]) <class 'torch.Tensor'>
after the relu layer, node value:  torch.Size([19717, 16]) <class 'torch.Tensor'>
after the dropout, node value:  torch.Size([19717, 16]) <class 'torch.Tensor'>
after the second convolution layer, node value:  torch.Size([19717, 3]) <class 'torch.Tensor'>


In [5]:
print(edge_index.shape)

torch.Size([2, 88648])


In [6]:

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

    
model.eval()
_, pred = model(data).max(dim=1)
correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()









In [7]:
print('Accuracy: {:.4f}'.format(acc))

Accuracy: 0.7930


In [None]:
print('Accuracy: {:.4f}'.format(acc))

In [76]:
torch.cuda.empty_cache

<function torch.cuda.empty_cache()>

In [6]:
!nvidia-smi

Fri Sep 13 17:50:13 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 166...  On   | 00000000:01:00.0  On |                  N/A |
| 43%   36C    P2    23W / 120W |   5745MiB /  5909MiB |      3%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [1]:
!(nvidia-smi | grep 'python' | awk '{ print $3 }' | xargs -n1 kill -9)

In [2]:
!nvidia-smi

Sat Sep 14 09:53:30 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 166...  On   | 00000000:01:00.0  On |                  N/A |
| 43%   33C    P8     7W / 120W |    240MiB /  5909MiB |      1%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [1]:
import torch
cuda = torch.device('cuda')
x = torch.randn((1, 1), requires_grad=True)
print(x.device)
with torch.autograd.profiler.profile(use_cuda=True) as prof:
        y = x ** 2
        y.backward()
print(prof)

cpu
-----------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  -----------------------------------  
Name                                 Self CPU total %  Self CPU total   CPU total %      CPU total        CPU time avg     CUDA total %     CUDA total       CUDA time avg    Number of Calls  Input Shapes                         
-----------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  -----------------------------------  
pow                                  29.92%           5.176ms          29.92%           5.176ms          5.176ms          30.32%           5.232ms          5.232ms          1                []                                   
torch::autograd::GraphRoot           0.04%            6.396us          0.04%       

In [5]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

True
0
1
GeForce GTX 1660 Ti
