In [None]:
# Node2Vec Model Using torch geometric with Cora
import torch
from torch_geometric.datasets import Planetoid
import os
from torch_geometric.nn import Node2Vec
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

torch.cuda.is_available()

In [None]:
# DOWNLOAD DATA
#os.environ['http_proxy'] = "http://proxy-sifi.rd.corpintra.net:3128" 
#os.environ['https_proxy'] = "http://proxy-sifi.rd.corpintra.net:3128"
path = os.getcwd()
dataset = Planetoid(path, "Cora")

In [None]:
# IMPORT DATA
data = dataset[0]
print('Coda: ', data)

In [None]:
# CONSTRUCT THE MODEL
device = 'cuda' if torch.cuda.is_available() else 'cpu'

Node2Vec_model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=20, context_size=10,
                 walks_per_node=10, p=1, q=1, num_negative_samples=1,
                 num_nodes=None, sparse=True).to(device)

loader = Node2Vec_model.loader(batch_size=128, shuffle=True, num_workers=4) # for batch training
optimizer = torch.optim.SparseAdam(list(Node2Vec_model.parameters()), # list of parameters
                                    lr = 0.01 # learning rate
                                    )

In [None]:
# PLOT 2D OF EMBEDDED REPRESENTATION (BEFORE TRAINING!)
@torch.no_grad() # deactivate autograd functionality
def plot_point(colors):
    Node2Vec_model.eval() # evalutae the model based on the trained parameters
    z = Node2Vec_model(torch.arange(data.num_nodes, device=device)) # embedding rep
    z = TSNE(n_components=2).fit_transform(z.cpu().numpy())
    y = data.y.cpu().numpy()

    plt.figure()
    for i in range(dataset.num_classes):
        plt.scatter(z[y==i,0], z[y==i,1],s=20,color=colors[i])
    plt.axis('off')
    plt.show()

colors = [
        '#ffc0cb', '#bada55', '#008080', '#420420', '#7fe5f0', '#065535',
        '#ffd700'
    ]
plot_point(colors)


In [None]:
# TRAIN FUNCTION
def train():
    Node2Vec_model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = Node2Vec_model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)
    
# OBTAIN EMBEDDING REPRESENTATION
for epoch in range(1,101):
    loss = train()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')
    plot_point(colors)

In [None]:
# PLOT 2D OF EMBEDDED REPRESENTATION
plot_point(colors)



In [None]:
# NODE CLASSIFICATION
def test():
    Node2Vec_model.eval() # Evaluate the model based on the trained parameters
    z = Node2Vec_model() # evaluate the model based on the trained parameters
    acc = Node2Vec_model.test(  z[data.train_mask], data.y[data.train_mask],
                                z[data.test_mask], data.y[data.test_mask],
                                max_iter=150)
    return acc

print('Accuracy: ', test())
