<a href="https://colab.research.google.com/github/vagmin27/DeepLearning/blob/main/Pytorch_geometric.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
torch.__version__

'1.10.0+cu111'

In [None]:
# Installation guide
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

[K     |████████████████████████████████| 7.9 MB 2.7 MB/s 
[K     |████████████████████████████████| 3.5 MB 2.8 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn
from torch_geometric.datasets import Planetoid
import torch_geometric.utils
from torch_geometric.transforms import NormalizeFeatures

from sklearn.manifold import TSNE
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

# Graph Datasets
<h2>
<b>Citation Networks</b>

<h3>1.   Cora
<h3>2.   Citeseer
<h3>3.   Pubmed

<h3>
Nodes denotes the documents, and edges correspond to citations. Each node feature corresponds to the bag-of-words representation of the document and belongs to one of the academic topics.

In [None]:
# load datasets
dataset = Planetoid(root='data/Planetoid', name='Pubmed', transform = NormalizeFeatures())

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!


In [None]:
print(dataset)
print("number of graphs:\t\t",len(dataset))
print("number of classes:\t\t",dataset.num_classes)
print("number of node features:\t",dataset.num_node_features)
print("number of edge features:\t",dataset.num_edge_features)

Pubmed()
number of graphs:		 1
number of classes:		 3
number of node features:	 500
number of edge features:	 0


In [None]:
print(dataset.data)
data = dataset[0]

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])


In [None]:
print("edge list:\t\t", data.edge_index.shape)
print("number of nodes:\t\t", data.num_nodes)
print("training mask:\t\t", data.train_mask.shape)
print("valiadtion mask:\t\t", data.val_mask.shape)
print("test mask:\t\t", data.test_mask.shape)
print("feature matrix:\t\t", data.x.shape)
print("label set:\t\t", data.y.shape)
print(f'Training node label rate\t\t: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes\t\t: {data.has_isolated_nodes()}')
print(f'Has self-loops\t\t: {data.has_self_loops()}')
print(f'Is undirected\t\t: {data.is_undirected()}')

edge list:		 torch.Size([2, 88648])
number of nodes:		 19717
training mask:		 torch.Size([19717])
valiadtion mask:		 torch.Size([19717])
test mask:		 torch.Size([19717])
feature matrix:		 torch.Size([19717, 500])
label set:		 torch.Size([19717])
Training node label rate		: 0.00
Has isolated nodes		: False
Has self-loops		: False
Is undirected		: True


In [None]:
edge_index = data.edge_index
print(data.num_edges)
print(edge_index)
# COO format


88648
tensor([[    0,     0,     0,  ..., 19714, 19715, 19716],
        [ 1378,  1544,  6092,  ..., 12278,  4284, 16030]])


In [None]:
# graph data visualization
num_nodes = data.num_nodes
num_edges = data.num_edges
adj_matrix = np.zeros((num_nodes, num_nodes))

# adjacency matrix
for e in range(num_edges):
  src = data.edge_index[0][e]
  tgt = data.edge_index[1][e]
  adj_matrix[src][tgt] = 1

G = nx.from_numpy_matrix(adj_matrix)
pos = nx.spring_layout(G)
nx.draw(G, pos, node_size = 10, node_color = 'red')

In [None]:
def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

In [None]:
from torch_geometric.nn import GCNConv
# from torch_geometric.nn import GATConv


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)
print(model)

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)


In [None]:
model = GCN(hidden_channels=16)
model.eval()

out = model(data.x, data.edge_index)
visualize(out, color=data.y)

In [None]:
model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 101):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.8150


In [None]:
model.eval()

out = model(data.x, data.edge_index)
visualize(out, color=data.y)