In [1]:
import torch
import torch.nn
import torch.nn.functional as F

import numpy
import matplotlib.pyplot as plt

In [2]:
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric 

Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.10.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 5.5 MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.10.0%2Bcu113/torch_sparse-0.6.12-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 5.1 MB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.12
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Collecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-1.10.0%2Bcu113/torch_cluster-1.5.9-cp37-cp37m-linux_x86_64.whl (2.3 MB)
[K     |████████████████████████████████| 2.3

In [3]:
import torch_geometric 
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, VGAE, GAE
from torch_geometric.utils import train_test_split_edges 

In [4]:
dataset = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
dataset.data

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [5]:
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = None

In [6]:
type(dataset)

torch_geometric.datasets.planetoid.Planetoid

In [7]:
data = train_test_split_edges(data)



In [8]:
data
#edge index = adjacency matrix, test_neg_edges -> edges in test set that are not in the graph, test_pos_edge_index -> edges in test set that are in graph, x -> feature matrix, y -> labels

Data(x=[3327, 3703], y=[3327], val_pos_edge_index=[2, 227], test_pos_edge_index=[2, 455], train_pos_edge_index=[2, 7740], train_neg_adj_mask=[3327, 3327], val_neg_edge_index=[2, 227], test_neg_edge_index=[2, 455])

In [9]:
#Define encoder
#in_channels -> number of features 
#out_channels -> 2 * out_channels --> because we have 2 convolutional layers. We go from the input features to double of output features and then in 2nd layer we go from double of output features we go to out features (size of embedding we want to produce)


In [10]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)


In [11]:
#Define the autoencoder

In [14]:
#Parameters
out_channels = 2 #embedding in 2 dimensions
num_features = dataset.num_features
epochs = 100

#Model
model = GAE(GCNEncoder(num_features, out_channels))

#Move to GPU, if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

#Initialize the optimizer
optimizer =torch.optim.Adam(model.parameters(), lr = 0.01)


In [15]:
model

GAE(
  (encoder): GCNEncoder(
    (conv1): GCNConv(3703, 4)
    (conv2): GCNConv(4, 2)
  )
  (decoder): InnerProductDecoder()
)

In [16]:
dir(model)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_call_impl',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_backward_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_set',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_version',
 'add_module',
 'apply',


In [21]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [24]:
for epoch in range(1, epochs + 1):
    loss = train()

    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))

Epoch: 001, AUC: 0.8129, AP: 0.8204
Epoch: 002, AUC: 0.8140, AP: 0.8217
Epoch: 003, AUC: 0.8154, AP: 0.8229
Epoch: 004, AUC: 0.8167, AP: 0.8243
Epoch: 005, AUC: 0.8181, AP: 0.8257
Epoch: 006, AUC: 0.8199, AP: 0.8275
Epoch: 007, AUC: 0.8212, AP: 0.8289
Epoch: 008, AUC: 0.8224, AP: 0.8302
Epoch: 009, AUC: 0.8238, AP: 0.8318
Epoch: 010, AUC: 0.8252, AP: 0.8337
Epoch: 011, AUC: 0.8267, AP: 0.8354
Epoch: 012, AUC: 0.8281, AP: 0.8370
Epoch: 013, AUC: 0.8295, AP: 0.8383
Epoch: 014, AUC: 0.8307, AP: 0.8395
Epoch: 015, AUC: 0.8317, AP: 0.8403
Epoch: 016, AUC: 0.8328, AP: 0.8412
Epoch: 017, AUC: 0.8340, AP: 0.8424
Epoch: 018, AUC: 0.8352, AP: 0.8434
Epoch: 019, AUC: 0.8362, AP: 0.8442
Epoch: 020, AUC: 0.8372, AP: 0.8449
Epoch: 021, AUC: 0.8382, AP: 0.8458
Epoch: 022, AUC: 0.8387, AP: 0.8461
Epoch: 023, AUC: 0.8396, AP: 0.8468
Epoch: 024, AUC: 0.8403, AP: 0.8473
Epoch: 025, AUC: 0.8410, AP: 0.8479
Epoch: 026, AUC: 0.8417, AP: 0.8484
Epoch: 027, AUC: 0.8424, AP: 0.8488
Epoch: 028, AUC: 0.8429, AP:

In [25]:
Z = model.encode(x, train_pos_edge_index)
Z

tensor([[-0.7016,  0.4660],
        [ 0.6916,  1.3677],
        [-0.4613, -0.6089],
        ...,
        [ 0.0550,  1.3295],
        [-0.8283,  0.1468],
        [-1.0683, -0.2868]], device='cuda:0', grad_fn=<AddBackward0>)