In [1]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.3.0+cu121


In [2]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges

# Tutorial 6  
Graph AutoEncoders GAE &  
Variational Graph Autoencoders VGAE    

[paper](https://arxiv.org/pdf/1611.07308.pdf)  
[code](https://github.com/rusty1s/pytorch_geometric/blob/master/examples/autoencoder.py)

## Graph AutoEncoder GAE

### Load the data

In [3]:
# dataset = Planetoid("data/", "CiteSeer", transform=T.NormalizeFeatures())
# dataset.data

from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

In [4]:
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = None
data

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])

In [5]:
data = train_test_split_edges(data)



In [6]:
data

Data(x=[17, 7], y=[1], val_pos_edge_index=[2, 0], val_pos_edge_attr=[0, 4], test_pos_edge_index=[2, 1], test_pos_edge_attr=[1, 4], train_pos_edge_index=[2, 36], train_pos_edge_attr=[36, 4], train_neg_adj_mask=[17, 17], val_neg_edge_index=[2, 0], test_neg_edge_index=[2, 1])

### Define the Encoder

In [7]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)


### Define the Autoencoder

In [8]:
from torch_geometric.nn import GAE

In [9]:
# parameters
out_channels = 2
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [10]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)



In [11]:
for epoch in range(1, epochs + 1):
    loss = train()

    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))

Epoch: 001, AUC: 0.0000, AP: 0.5000
Epoch: 002, AUC: 0.0000, AP: 0.5000
Epoch: 003, AUC: 0.0000, AP: 0.5000
Epoch: 004, AUC: 0.0000, AP: 0.5000
Epoch: 005, AUC: 0.0000, AP: 0.5000
Epoch: 006, AUC: 0.0000, AP: 0.5000
Epoch: 007, AUC: 0.0000, AP: 0.5000
Epoch: 008, AUC: 0.0000, AP: 0.5000
Epoch: 009, AUC: 0.0000, AP: 0.5000
Epoch: 010, AUC: 0.0000, AP: 0.5000
Epoch: 011, AUC: 0.0000, AP: 0.5000
Epoch: 012, AUC: 0.0000, AP: 0.5000
Epoch: 013, AUC: 0.0000, AP: 0.5000
Epoch: 014, AUC: 0.0000, AP: 0.5000
Epoch: 015, AUC: 0.0000, AP: 0.5000
Epoch: 016, AUC: 0.0000, AP: 0.5000
Epoch: 017, AUC: 0.0000, AP: 0.5000
Epoch: 018, AUC: 0.0000, AP: 0.5000
Epoch: 019, AUC: 0.0000, AP: 0.5000
Epoch: 020, AUC: 0.0000, AP: 0.5000
Epoch: 021, AUC: 0.0000, AP: 0.5000
Epoch: 022, AUC: 0.0000, AP: 0.5000
Epoch: 023, AUC: 0.0000, AP: 0.5000
Epoch: 024, AUC: 0.0000, AP: 0.5000
Epoch: 025, AUC: 0.0000, AP: 0.5000
Epoch: 026, AUC: 0.0000, AP: 0.5000
Epoch: 027, AUC: 0.0000, AP: 0.5000
Epoch: 028, AUC: 0.0000, AP:

In [34]:
Z = model.encode(x, train_pos_edge_index)
Z

tensor([[ 0.4601, -0.6305],
        [ 0.4819, -0.6842],
        [ 0.3945, -0.5024],
        [ 0.4260, -0.5537],
        [ 0.4956, -0.7163],
        [ 0.4434, -0.5928],
        [ 0.4325, -0.5752],
        [ 0.4244, -0.5621],
        [ 0.4923, -0.7015],
        [ 0.4923, -0.7015],
        [ 0.4244, -0.5621],
        [ 0.2463, -0.3203],
        [-0.2387,  0.2639],
        [ 0.2397, -0.2988],
        [-1.6152,  2.0249],
        [-1.4582,  1.9508],
        [-1.4582,  1.9508]], grad_fn=<AddBackward0>)

## Are the results (AUC) and (AP) easy to read and compare?

# Use Tensorboard

In [13]:
from torch.utils.tensorboard import SummaryWriter

2024-05-11 17:13:44.438697: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-11 17:13:44.505487: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
# parameters
out_channels = 2
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### Import tensorboard

#### Installation: (if needed) "pip install tensorboard"

In [15]:
writer = SummaryWriter('runs/GAE1_experiment_'+'2d_100_epochs')

In [16]:
for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))


    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.5662, AP: 0.6025
Epoch: 002, AUC: 0.6186, AP: 0.6618
Epoch: 003, AUC: 0.6319, AP: 0.6710
Epoch: 004, AUC: 0.6382, AP: 0.6772
Epoch: 005, AUC: 0.6431, AP: 0.6822
Epoch: 006, AUC: 0.6466, AP: 0.6855
Epoch: 007, AUC: 0.6486, AP: 0.6879
Epoch: 008, AUC: 0.6507, AP: 0.6901
Epoch: 009, AUC: 0.6528, AP: 0.6922
Epoch: 010, AUC: 0.6546, AP: 0.6944
Epoch: 011, AUC: 0.6568, AP: 0.6966
Epoch: 012, AUC: 0.6586, AP: 0.6988
Epoch: 013, AUC: 0.6609, AP: 0.7016
Epoch: 014, AUC: 0.6630, AP: 0.7048
Epoch: 015, AUC: 0.6651, AP: 0.7081
Epoch: 016, AUC: 0.6675, AP: 0.7115
Epoch: 017, AUC: 0.6694, AP: 0.7145
Epoch: 018, AUC: 0.6710, AP: 0.7175
Epoch: 019, AUC: 0.6723, AP: 0.7200
Epoch: 020, AUC: 0.6741, AP: 0.7227
Epoch: 021, AUC: 0.6760, AP: 0.7253
Epoch: 022, AUC: 0.6773, AP: 0.7275
Epoch: 023, AUC: 0.6793, AP: 0.7303
Epoch: 024, AUC: 0.6802, AP: 0.7323
Epoch: 025, AUC: 0.6816, AP: 0.7342
Epoch: 026, AUC: 0.6823, AP: 0.7351
Epoch: 027, AUC: 0.6830, AP: 0.7360
Epoch: 028, AUC: 0.6834, AP:

## Graph Variational AutoEncoder (GVAE)

In [17]:
from torch_geometric.nn import VGAE

In [18]:
# dataset = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = data.y = None
data = train_test_split_edges(data)


class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)



In [19]:
out_channels = 2
num_features = dataset.num_features
epochs = 300


model = VGAE(VariationalGCNEncoder(num_features, out_channels))  # new line

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [20]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)

    loss = loss + (1 / data.num_nodes) * model.kl_loss()  # new line
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [21]:
writer = SummaryWriter('runs/VGAE_experiment_'+'2d_100_epochs')

for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))


    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.6285, AP: 0.6641
Epoch: 002, AUC: 0.6372, AP: 0.6653
Epoch: 003, AUC: 0.6358, AP: 0.6594
Epoch: 004, AUC: 0.6337, AP: 0.6572
Epoch: 005, AUC: 0.6323, AP: 0.6556
Epoch: 006, AUC: 0.6313, AP: 0.6545
Epoch: 007, AUC: 0.6304, AP: 0.6534
Epoch: 008, AUC: 0.6298, AP: 0.6528
Epoch: 009, AUC: 0.6288, AP: 0.6521
Epoch: 010, AUC: 0.6285, AP: 0.6514
Epoch: 011, AUC: 0.6281, AP: 0.6507
Epoch: 012, AUC: 0.6274, AP: 0.6501
Epoch: 013, AUC: 0.6268, AP: 0.6495
Epoch: 014, AUC: 0.6262, AP: 0.6484
Epoch: 015, AUC: 0.6254, AP: 0.6475
Epoch: 016, AUC: 0.6248, AP: 0.6463
Epoch: 017, AUC: 0.6242, AP: 0.6453
Epoch: 018, AUC: 0.6231, AP: 0.6444
Epoch: 019, AUC: 0.6221, AP: 0.6431
Epoch: 020, AUC: 0.6216, AP: 0.6422
Epoch: 021, AUC: 0.6214, AP: 0.6417
Epoch: 022, AUC: 0.6218, AP: 0.6421
Epoch: 023, AUC: 0.6220, AP: 0.6422
Epoch: 024, AUC: 0.6220, AP: 0.6422
Epoch: 025, AUC: 0.6221, AP: 0.6425
Epoch: 026, AUC: 0.6230, AP: 0.6434
Epoch: 027, AUC: 0.6239, AP: 0.6444
Epoch: 028, AUC: 0.6245, AP: