In [1]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

2.5.1+cu121
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone


In [2]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges

# Tutorial 6  
Graph AutoEncoders GAE &  
Variational Graph Autoencoders VGAE    

[paper](https://arxiv.org/pdf/1611.07308.pdf)  
[code](https://github.com/rusty1s/pytorch_geometric/blob/master/examples/autoencoder.py)

## Graph AutoEncoder GAE

### Load the data

In [3]:
dataset = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
dataset.data

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [4]:
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = None
data

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327])

In [5]:
data = train_test_split_edges(data)



In [6]:
data

Data(x=[3327, 3703], y=[3327], val_pos_edge_index=[2, 227], test_pos_edge_index=[2, 455], train_pos_edge_index=[2, 7740], train_neg_adj_mask=[3327, 3327], val_neg_edge_index=[2, 227], test_neg_edge_index=[2, 455])

### Define the Encoder

In [7]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)


### Define the Autoencoder

In [8]:
from torch_geometric.nn import GAE

In [9]:
# parameters
out_channels = 2
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [11]:

# model
dir(model)


['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks',
 '_get_backward_pre_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_se

In [12]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)



In [13]:
for epoch in range(1, epochs + 1):
    loss = train()

    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))

Epoch: 001, AUC: 0.6281, AP: 0.6731
Epoch: 002, AUC: 0.6420, AP: 0.6830
Epoch: 003, AUC: 0.6462, AP: 0.6873
Epoch: 004, AUC: 0.6475, AP: 0.6894
Epoch: 005, AUC: 0.6487, AP: 0.6908
Epoch: 006, AUC: 0.6498, AP: 0.6924
Epoch: 007, AUC: 0.6509, AP: 0.6942
Epoch: 008, AUC: 0.6514, AP: 0.6954
Epoch: 009, AUC: 0.6519, AP: 0.6970
Epoch: 010, AUC: 0.6526, AP: 0.6991
Epoch: 011, AUC: 0.6525, AP: 0.7013
Epoch: 012, AUC: 0.6523, AP: 0.7039
Epoch: 013, AUC: 0.6514, AP: 0.7060
Epoch: 014, AUC: 0.6507, AP: 0.7081
Epoch: 015, AUC: 0.6500, AP: 0.7098
Epoch: 016, AUC: 0.6485, AP: 0.7105
Epoch: 017, AUC: 0.6473, AP: 0.7111
Epoch: 018, AUC: 0.6461, AP: 0.7112
Epoch: 019, AUC: 0.6459, AP: 0.7119
Epoch: 020, AUC: 0.6457, AP: 0.7121
Epoch: 021, AUC: 0.6454, AP: 0.7124
Epoch: 022, AUC: 0.6455, AP: 0.7127
Epoch: 023, AUC: 0.6456, AP: 0.7130
Epoch: 024, AUC: 0.6455, AP: 0.7132
Epoch: 025, AUC: 0.6460, AP: 0.7139
Epoch: 026, AUC: 0.6466, AP: 0.7148
Epoch: 027, AUC: 0.6472, AP: 0.7157
Epoch: 028, AUC: 0.6489, AP:

In [None]:
Z = model.encode(x, train_pos_edge_index)
Z

tensor([[-0.3415,  0.3505],
        [ 0.8631, -1.1042],
        [-0.7020,  0.8189],
        ...,
        [ 0.0874, -0.0409],
        [-0.6144,  0.7314],
        [-0.6832,  0.7997]], device='cuda:0', grad_fn=<AddBackward0>)

## Are the results (AUC) and (AP) easy to read and compare?

# Use Tensorboard

In [14]:
from torch.utils.tensorboard import SummaryWriter

In [15]:
# parameters
out_channels = 2
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### Import tensorboard

#### Installation: (if needed) "pip install tensorboard"

In [16]:
writer = SummaryWriter('runs/GAE1_experiment_'+'2d_100_epochs')

In [17]:
for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))


    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.6301, AP: 0.6722
Epoch: 002, AUC: 0.6491, AP: 0.6865
Epoch: 003, AUC: 0.6535, AP: 0.6906
Epoch: 004, AUC: 0.6552, AP: 0.6921
Epoch: 005, AUC: 0.6560, AP: 0.6935
Epoch: 006, AUC: 0.6564, AP: 0.6944
Epoch: 007, AUC: 0.6570, AP: 0.6955
Epoch: 008, AUC: 0.6573, AP: 0.6964
Epoch: 009, AUC: 0.6575, AP: 0.6978
Epoch: 010, AUC: 0.6579, AP: 0.6994
Epoch: 011, AUC: 0.6587, AP: 0.7018
Epoch: 012, AUC: 0.6589, AP: 0.7041
Epoch: 013, AUC: 0.6592, AP: 0.7070
Epoch: 014, AUC: 0.6593, AP: 0.7095
Epoch: 015, AUC: 0.6589, AP: 0.7116
Epoch: 016, AUC: 0.6584, AP: 0.7136
Epoch: 017, AUC: 0.6577, AP: 0.7152
Epoch: 018, AUC: 0.6565, AP: 0.7160
Epoch: 019, AUC: 0.6560, AP: 0.7170
Epoch: 020, AUC: 0.6553, AP: 0.7171
Epoch: 021, AUC: 0.6543, AP: 0.7170
Epoch: 022, AUC: 0.6537, AP: 0.7170
Epoch: 023, AUC: 0.6534, AP: 0.7170
Epoch: 024, AUC: 0.6529, AP: 0.7170
Epoch: 025, AUC: 0.6532, AP: 0.7177
Epoch: 026, AUC: 0.6532, AP: 0.7183
Epoch: 027, AUC: 0.6532, AP: 0.7189
Epoch: 028, AUC: 0.6534, AP:

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Graph Variational AutoEncoder (GVAE)

In [None]:
from torch_geometric.nn import VGAE

In [None]:
dataset = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = data.y = None
data = train_test_split_edges(data)


class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)



In [None]:
out_channels = 2
num_features = dataset.num_features
epochs = 300


model = VGAE(VariationalGCNEncoder(num_features, out_channels))  # new line

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)

    loss = loss + (1 / data.num_nodes) * model.kl_loss()  # new line
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [None]:
writer = SummaryWriter('runs/VGAE_experiment_'+'2d_100_epochs')

for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))


    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.6043, AP: 0.6264
Epoch: 002, AUC: 0.4937, AP: 0.5243
Epoch: 003, AUC: 0.4943, AP: 0.5001
Epoch: 004, AUC: 0.5013, AP: 0.5079
Epoch: 005, AUC: 0.5011, AP: 0.5006
Epoch: 006, AUC: 0.5022, AP: 0.5011
Epoch: 007, AUC: 0.5022, AP: 0.5011
Epoch: 008, AUC: 0.5022, AP: 0.5011
Epoch: 009, AUC: 0.4978, AP: 0.5007
Epoch: 010, AUC: 0.4978, AP: 0.5007
Epoch: 011, AUC: 0.4978, AP: 0.5007
Epoch: 012, AUC: 0.5011, AP: 0.5011
Epoch: 013, AUC: 0.5011, AP: 0.5011
Epoch: 014, AUC: 0.5011, AP: 0.5011
Epoch: 015, AUC: 0.5011, AP: 0.5011
Epoch: 016, AUC: 0.5011, AP: 0.5011
Epoch: 017, AUC: 0.5011, AP: 0.5011
Epoch: 018, AUC: 0.5011, AP: 0.5011
Epoch: 019, AUC: 0.5011, AP: 0.5011
Epoch: 020, AUC: 0.5011, AP: 0.5011
Epoch: 021, AUC: 0.5011, AP: 0.5011
Epoch: 022, AUC: 0.4989, AP: 0.4995
Epoch: 023, AUC: 0.4989, AP: 0.4995
Epoch: 024, AUC: 0.4989, AP: 0.4995
Epoch: 025, AUC: 0.4989, AP: 0.4995
Epoch: 026, AUC: 0.4989, AP: 0.4995
Epoch: 027, AUC: 0.4989, AP: 0.4995
Epoch: 028, AUC: 0.4989, AP:

Epoch: 231, AUC: 0.4989, AP: 0.4995
Epoch: 232, AUC: 0.4989, AP: 0.4995
Epoch: 233, AUC: 0.4989, AP: 0.4995
Epoch: 234, AUC: 0.4989, AP: 0.4995
Epoch: 235, AUC: 0.4989, AP: 0.4995
Epoch: 236, AUC: 0.4989, AP: 0.4995
Epoch: 237, AUC: 0.4989, AP: 0.4995
Epoch: 238, AUC: 0.4989, AP: 0.4995
Epoch: 239, AUC: 0.4989, AP: 0.4995
Epoch: 240, AUC: 0.4989, AP: 0.4995
Epoch: 241, AUC: 0.5011, AP: 0.5011
Epoch: 242, AUC: 0.5011, AP: 0.5011
Epoch: 243, AUC: 0.5011, AP: 0.5011
Epoch: 244, AUC: 0.5011, AP: 0.5011
Epoch: 245, AUC: 0.5011, AP: 0.5011
Epoch: 246, AUC: 0.5011, AP: 0.5011
Epoch: 247, AUC: 0.5011, AP: 0.5011
Epoch: 248, AUC: 0.5011, AP: 0.5011
Epoch: 249, AUC: 0.5011, AP: 0.5011
Epoch: 250, AUC: 0.5011, AP: 0.5011
Epoch: 251, AUC: 0.5011, AP: 0.5011
Epoch: 252, AUC: 0.5011, AP: 0.5011
Epoch: 253, AUC: 0.5011, AP: 0.5011
Epoch: 254, AUC: 0.5011, AP: 0.5011
Epoch: 255, AUC: 0.5011, AP: 0.5011
Epoch: 256, AUC: 0.5011, AP: 0.5011
Epoch: 257, AUC: 0.5011, AP: 0.5011
Epoch: 258, AUC: 0.5011, AP: