# CORA (Basic Model)
### 2-layer GCN

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



The following is implemented using Danish's GCN model which he sent us earlier in the quarter.

In [2]:
# Define the GCN model
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_feats, h_feats)
        self.conv2 = GCNConv(h_feats, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        h = self.conv1(x, edge_index)
        h = F.relu(h)
        h = self.conv2(h, edge_index)
        return h

# Load the datasets
cora_dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = cora_dataset[0]
data = data.to(device)

In [3]:
in_feats = data.x.shape[1]
h_feats = 64
num_classes = cora_dataset.num_classes
model = GCN(in_feats, h_feats, num_classes)
model = model.to(device)

def train(model, data, train_mask, labels):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    model.train()
    logits = model(data.cuda())
    
    loss = F.cross_entropy(logits[train_mask], labels[train_mask])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.item()

### Uncomment the following cell if you want to save embeddings (Wyatt will likely do this, not sure - not my topic)

In [4]:
# def save_embeddings_and_edges(dataset, dataset_name):
#     model.eval()
#     with torch.no_grad():
#         embeddings = model(g, features)
#     print(embeddings.shape)
#     np.save(f'../embeddings/{dataset_name}_embeddings.npy', embeddings.detach().numpy())


#     edge_index = g.edges()
#     np.save(f'../embeddings/{dataset_name}_edge_index.npy', np.vstack((edge_index[0].numpy(), edge_index[1].numpy())))

In [5]:
train(model, data, data.train_mask, data.y)
# save_embeddings_and_edges(cora_dataset, 'cora')

1.9520429372787476

In [6]:
def test():
    # data = cora_dataset[0].to(device)
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)

    # incorrect_indices = (pred[data.test_mask] != data.y[data.test_mask]).nonzero()

    # print("Incorrect Predictions Indices:", incorrect_indices.flatten().tolist())

    acc = (pred[data.test_mask] == data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()
    return acc

for epoch in range(0, 200):
    loss = train(model, data, data.train_mask, data.y)
    acc = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Accuracy: {acc:.4f}')

Epoch: 000, Loss: 1.6675, Accuracy: 0.7710
Epoch: 001, Loss: 1.2989, Accuracy: 0.7700
Epoch: 002, Loss: 0.9228, Accuracy: 0.8100
Epoch: 003, Loss: 0.6364, Accuracy: 0.7260
Epoch: 004, Loss: 0.4531, Accuracy: 0.8070
Epoch: 005, Loss: 0.3454, Accuracy: 0.7190
Epoch: 006, Loss: 0.2493, Accuracy: 0.8050
Epoch: 007, Loss: 0.2012, Accuracy: 0.7330
Epoch: 008, Loss: 0.1447, Accuracy: 0.8000
Epoch: 009, Loss: 0.1277, Accuracy: 0.7510
Epoch: 010, Loss: 0.0936, Accuracy: 0.7960
Epoch: 011, Loss: 0.0875, Accuracy: 0.7550
Epoch: 012, Loss: 0.0656, Accuracy: 0.7910
Epoch: 013, Loss: 0.0631, Accuracy: 0.7550
Epoch: 014, Loss: 0.0486, Accuracy: 0.7920
Epoch: 015, Loss: 0.0476, Accuracy: 0.7620
Epoch: 016, Loss: 0.0372, Accuracy: 0.7940
Epoch: 017, Loss: 0.0370, Accuracy: 0.7610
Epoch: 018, Loss: 0.0291, Accuracy: 0.7950
Epoch: 019, Loss: 0.0292, Accuracy: 0.7690
Epoch: 020, Loss: 0.0232, Accuracy: 0.7980
Epoch: 021, Loss: 0.0234, Accuracy: 0.7720
Epoch: 022, Loss: 0.0187, Accuracy: 0.7930
Epoch: 023,

In [7]:
torch.save(model.state_dict(), 'cora_gt.pt')

The following shows the 10 nodes with the highest degree.

In [8]:
# import networkx as nx
# from dgl import to_networkx
# import matplotlib.pyplot as plt

# G = to_networkx(g)
# pos = nx.spring_layout(G, seed=42)
# cent = nx.degree_centrality(G)
# node_size = list(map(lambda x: x * 500, cent.values()))
# cent_array = np.array(list(cent.values()))
# threshold = sorted(cent_array, reverse=True)[50]
# print("threshold", threshold)
# cent_bin = np.where(cent_array >= threshold, 1, 0.1)
# plt.figure(figsize=(12, 12))
# nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
#                                cmap=plt.cm.plasma,
#                                node_color=cent_bin,
#                                nodelist=list(cent.keys()),
#                                alpha=cent_bin)
# edges = nx.draw_networkx_edges(G, pos, width=0.25, alpha=0.3)
# plt.show()

This shows me the node with the highest degree.

In [9]:
# threshold = sorted(cent_array, reverse=True)[0]
# print("threshold", threshold)
# cent_bin = np.where(cent_array >= threshold, 1, 0.1)
# plt.figure(figsize=(12, 12))
# nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
#                                cmap=plt.cm.plasma,
#                                node_color=cent_bin,
#                                nodelist=list(cent.keys()),
#                                alpha=cent_bin)
# edges = nx.draw_networkx_edges(G, pos, width=0.25, alpha=0.3)
# plt.show()