# This notebook uses different random samplers called Spiky Ball and Common Neighbor Aware Random Walk sampler and different aggregators

Spiky Ball Sampler(in 2020), Common Neighbor Aware Random Walk Sampler(in 2019)

## Install and import required packages

In [None]:
# Install required packages of PyTorch Geometric
!pip install -q torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.7.0.html
!pip install -q torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.7.0.html
!pip install -q git+https://github.com/rusty1s/pytorch_geometric.git

  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [None]:
# this part is so time consuming. be patient
!pip install littleballoffur
!pip install littleballoffur --upgrade

Requirement already up-to-date: littleballoffur in /usr/local/lib/python3.6/dist-packages (2.1.2)


In [None]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
import numpy as np
from torch_geometric.utils.convert import to_networkx
from google_drive_downloader import GoogleDriveDownloader as gdd
from torch_geometric.datasets import Yelp, Flickr, Amazon

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from littleballoffur import SpikyBallSampler, CommonNeighborAwareRandomWalkSampler

## Load graph datasets 

In [None]:
dataset = Flickr(root='data/Flickr')
data = dataset[0] 
print(data)

Data(edge_index=[2, 899756], test_mask=[89250], train_mask=[89250], val_mask=[89250], x=[89250, 500], y=[89250])


## Define Samplers


In [None]:
def Geometric_SpikyBallSampler(data, G_data, batch_size):
  N = data.num_nodes
  Spiky = SpikyBallSampler()
  sample_list = []  
  for i in range(int(N/batch_size)):
    num_node = len(Spiky.sample(G_data).nodes)
    sampled_edges = [node for node in Spiky.sample(G_data).edges]

    edge1 = [edge[0] for edge in sampled_edges]
    edge2 = [edge[1] for edge in sampled_edges]
    
    sampled_nodes = list(set(edge1+edge2))
    node_index = np.arange(0, len(sampled_nodes))
    node_dict = dict(zip(sampled_nodes, node_index))
    edge1 = [node_dict.get(e) for e in edge1]
    edge2 = [node_dict.get(e) for e in edge2]

    sample_edge = torch.stack([torch.tensor(edge1), torch.tensor(edge2)])

    sample_y = data.y[sampled_nodes]
    sample_x = data.x[sampled_nodes]

    sample_train_mask = data.train_mask[sampled_nodes]
    sample_test_mask = data.test_mask[sampled_nodes]
    sample_val_mask = data.val_mask[sampled_nodes]

    sample_data = Data(x=sample_x, edge_index=sample_edge, y=sample_y)
    
    sample_data.train_mask = sample_train_mask
    sample_data.test_mask = sample_test_mask
    sample_data.val_mask = sample_val_mask

    sample_list.append(sample_data)  

  return sample_list

In [None]:
def Geometric_CommonNeighborAwareRandomWalkSampler(data, G_data, n_nodes, batch_size):
  N = data.num_nodes
  CNARS = CommonNeighborAwareRandomWalkSampler(n_nodes)
  sample_list = []  
  for i in range(int(N/batch_size)):
    num_node = len(CNARS.sample(G_data).nodes)
    sampled_edges = [node for node in CNARS.sample(G_data).edges]

    edge1 = [edge[0] for edge in sampled_edges]
    edge2 = [edge[1] for edge in sampled_edges]
    
    sampled_nodes = list(set(edge1+edge2))
    node_index = np.arange(0, len(sampled_nodes))
    node_dict = dict(zip(sampled_nodes, node_index))
    edge1 = [node_dict.get(e) for e in edge1]
    edge2 = [node_dict.get(e) for e in edge2]

    sample_edge = torch.stack([torch.tensor(edge1), torch.tensor(edge2)])

    sample_y = data.y[sampled_nodes]
    sample_x = data.x[sampled_nodes]

    sample_train_mask = data.train_mask[sampled_nodes]
    sample_test_mask = data.test_mask[sampled_nodes]
    sample_val_mask = data.val_mask[sampled_nodes]

    sample_data = Data(x=sample_x, edge_index=sample_edge, y=sample_y)
    
    sample_data.train_mask = sample_train_mask
    sample_data.test_mask = sample_test_mask
    sample_data.val_mask = sample_val_mask

    sample_list.append(sample_data)  

  return sample_list

In [None]:
G_data = to_networkx(data, to_undirected=True)

sample_loader = Geometric_SpikyBallSampler(data, G_data, 6000) 
sample_loader = Geometric_CommonNeighborAwareRandomWalkSampler(data, G_data, 5000, 6000) 

for data in sample_loader:
  sample = data
  break

## Define model and train

In [None]:
##########################################################################
# Our graph doen't have information of num_classes unfortunately. 
# so manually.... (R8:8, oshumed:23, mr:2, yelp:23)
##########################################################################
class Net(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(Net, self).__init__()
        in_channels = 500  #dataset.num_features 
        out_channels = 7 #dataset.num_classes 
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(3 * hidden_channels, out_channels)

    def set_aggr(self, aggr):
        self.conv1.aggr = aggr
        self.conv2.aggr = aggr
        self.conv3.aggr = aggr

    def forward(self, x0, edge_index, edge_weight=None):
        x1 = F.relu(self.conv1(x0, edge_index, edge_weight))
        x1 = F.dropout(x1, p=0.2, training=self.training)
        x2 = F.relu(self.conv2(x1, edge_index, edge_weight))
        x2 = F.dropout(x2, p=0.2, training=self.training)
        x3 = F.relu(self.conv3(x2, edge_index, edge_weight))
        x3 = F.dropout(x3, p=0.2, training=self.training)
        x = torch.cat([x1, x2, x3], dim=-1)
        x = self.lin(x)
        
        return x.log_softmax(dim=-1)

In [None]:
###########################################################################
# Our graph doen't have information of num_classes unfortunately. 
# so manually.... (R8:8, oshumed:23, mr:2, yelp:23)
##########################################################################
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(hidden_channels=256).to(device)
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(model)

Net(
  (conv1): GCNConv(500, 256)
  (conv2): GCNConv(256, 256)
  (conv3): GCNConv(256, 256)
  (lin): Linear(in_features=768, out_features=7, bias=True)
)


In [None]:
# train function
def train():
    model.train()
    model.set_aggr('add') # This aggregator "add" can be change to "max", "mean" or commented out this line as "none"
    total_loss = total_examples = 0
    for data in sample_loader:
        data = data.to(device)
        optimizer.zero_grad()        
        out = model(data.x, data.edge_index)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_nodes
        total_examples += data.num_nodes
    return total_loss / total_examples

In [None]:
# test function
@torch.no_grad()
def test():
    model.eval()
    model.set_aggr('add') # This aggregator "add" can be change to "max", "mean" or commented out this line as "none"

    out = model(data.x.to(device), data.edge_index.to(device))
    pred = out.argmax(dim=-1)
    correct = pred.eq(data.y.to(device)) 

    accs = []
    accs = []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        accs.append(correct[mask].sum().item() / mask.sum().item())
    return accs 

In [None]:
# print results
for epoch in range(1, 51):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, Val: {accs[1]:.4f}, Test: {accs[2]:.4f}') 

Epoch: 01, Loss: 1.7607, Train: 0.4042, Val: 0.4323, Test: 0.4185
Epoch: 02, Loss: 1.7259, Train: 0.4178, Val: 0.4467, Test: 0.4177
Epoch: 03, Loss: 1.6684, Train: 0.4174, Val: 0.4361, Test: 0.4161
Epoch: 04, Loss: 1.6219, Train: 0.4191, Val: 0.4422, Test: 0.4177
Epoch: 05, Loss: 1.5954, Train: 0.4331, Val: 0.4528, Test: 0.4407
Epoch: 06, Loss: 1.5790, Train: 0.4430, Val: 0.4543, Test: 0.4446
Epoch: 07, Loss: 1.5638, Train: 0.4476, Val: 0.4589, Test: 0.4494
Epoch: 08, Loss: 1.5500, Train: 0.4517, Val: 0.4574, Test: 0.4509
Epoch: 09, Loss: 1.5396, Train: 0.4554, Val: 0.4627, Test: 0.4557
Epoch: 10, Loss: 1.5277, Train: 0.4608, Val: 0.4627, Test: 0.4589
Epoch: 11, Loss: 1.5158, Train: 0.4653, Val: 0.4650, Test: 0.4604
Epoch: 12, Loss: 1.5061, Train: 0.4682, Val: 0.4703, Test: 0.4628
Epoch: 13, Loss: 1.4957, Train: 0.4785, Val: 0.4772, Test: 0.4644
Epoch: 14, Loss: 1.4833, Train: 0.4860, Val: 0.4848, Test: 0.4731
Epoch: 15, Loss: 1.4724, Train: 0.4851, Val: 0.4909, Test: 0.4818
Epoch: 16,