# CORA (Basic Model)
### 2-layer GCN

In [146]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

The following is implemented using Danish's GCN model which he sent us earlier in the quarter.

In [147]:
# Define the GCN model
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_feats, h_feats)
        self.conv2 = GCNConv(h_feats, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        h = self.conv1(x, edge_index)
        h = F.relu(h)
        h = self.conv2(h, edge_index)
        return h

# Load the datasets
cora_dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = cora_dataset[0]
data = data.to(device)

In [148]:
in_feats = data.x.shape[1]
h_feats = 64
num_classes = cora_dataset.num_classes
model = GCN(in_feats, h_feats, num_classes)
model = model.to(device)

def train(model, data, train_mask, labels):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    model.train()
    logits = model(data.cuda())
    
    loss = F.cross_entropy(logits[train_mask], labels[train_mask])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.item()

### Uncomment the following cell if you want to save embeddings (Wyatt will likely do this, not sure - not my topic)

In [155]:
def save_embeddings_and_edges(dataset_name):
    model.eval()
    with torch.no_grad():
        #embeddings = model(g, features)
        #embeddings = model(data, in_feats)
        embeddings = model(data.cuda())
    #print(embeddings.shape)
    np.save(f'../embeddings/{dataset_name}_embeddings.npy', embeddings.cpu().detach().numpy())

    counterCorrect = int(0)
    print(data.cuda().y)
    print(len(data.cuda().y))
    print(type(data.cuda().y[0]))
    for i in range(0, len(embeddings)):
        #print(f"{torch.argmax(embeddings[i])} == {data.cuda().y[i]}")
        if torch.argmax(embeddings[i]) == data.cuda().y[i]:
            counterCorrect += 1
    print(f"embeddings are correct: {counterCorrect/len(embeddings)}")

    """
    # trying to figure out what embeddings is
    print(f"dtype: {embeddings.dtype}")
    print(f"device: {embeddings.device}")
    print(f"layout: {embeddings.layout}")
    print(f"stride: {embeddings.stride()}")

    
    # Printing Embeddings Tensor
    torch.set_printoptions(profile="full") # so it doesn' truncate the Tensor
    print(embeddings) # prints the whole tensor
    torch.set_printoptions(profile="default") # back to default
    """

    """
    # Printing the data matrix of type torch_geometric.data.data.Data
    print(f"data.cuda()'s type: {type(data.cuda())}") 
    print(f"data's type: {type(data)}") 
    for i in data.x:
        print()
        for j in i:
            print(f"{j}", end = ", ")
    """

    """
    # #edge_index = g.edges() # --- (original) g is not defined
    # #edge_index = embeddings.edges() # --- 'Tensor' object has no attribute 'edges'
    # #edge_index = data.edges() # --- 'GlobalStorage' object has no attribute 'edges'
    # # uncomment below lines when you figure out Tensors
    # edge_index = embeddings.cpu()
    # np.save(f'../embeddings/{dataset_name}_edge_index.npy', np.vstack((edge_index[0].numpy(), edge_index[1].numpy())))
    """

In [156]:
train(model, data, data.train_mask, data.y)


8.514948746096707e-10

In [157]:
def test():
    # data = cora_dataset[0].to(device)
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)

    # incorrect_indices = (pred[data.test_mask] != data.y[data.test_mask]).nonzero()

    # print("Incorrect Predictions Indices:", incorrect_indices.flatten().tolist())

    acc = (pred[data.test_mask] == data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()
    return acc

for epoch in range(0, 200):
    loss = train(model, data, data.train_mask, data.y)
    acc = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Accuracy: {acc:.4f}')

Epoch: 000, Loss: 0.0000, Accuracy: 0.7550
Epoch: 001, Loss: 0.0000, Accuracy: 0.7550
Epoch: 002, Loss: 0.0000, Accuracy: 0.7550
Epoch: 003, Loss: 0.0000, Accuracy: 0.7550
Epoch: 004, Loss: 0.0000, Accuracy: 0.7550
Epoch: 005, Loss: 0.0000, Accuracy: 0.7550
Epoch: 006, Loss: 0.0000, Accuracy: 0.7550
Epoch: 007, Loss: 0.0000, Accuracy: 0.7550
Epoch: 008, Loss: 0.0000, Accuracy: 0.7550
Epoch: 009, Loss: 0.0000, Accuracy: 0.7550
Epoch: 010, Loss: 0.0000, Accuracy: 0.7550
Epoch: 011, Loss: 0.0000, Accuracy: 0.7550
Epoch: 012, Loss: 0.0000, Accuracy: 0.7550
Epoch: 013, Loss: 0.0000, Accuracy: 0.7550
Epoch: 014, Loss: 0.0000, Accuracy: 0.7550
Epoch: 015, Loss: 0.0000, Accuracy: 0.7550
Epoch: 016, Loss: 0.0000, Accuracy: 0.7550
Epoch: 017, Loss: 0.0000, Accuracy: 0.7550
Epoch: 018, Loss: 0.0000, Accuracy: 0.7550
Epoch: 019, Loss: 0.0000, Accuracy: 0.7550
Epoch: 020, Loss: 0.0000, Accuracy: 0.7550
Epoch: 021, Loss: 0.0000, Accuracy: 0.7550
Epoch: 022, Loss: 0.0000, Accuracy: 0.7550
Epoch: 023,

In [158]:
torch.save(model.state_dict(), 'cora_gt.pt')

The following shows the 10 nodes with the highest degree.

In [159]:
# import networkx as nx
# from dgl import to_networkx
# import matplotlib.pyplot as plt

# G = to_networkx(g)
# pos = nx.spring_layout(G, seed=42)
# cent = nx.degree_centrality(G)
# node_size = list(map(lambda x: x * 500, cent.values()))
# cent_array = np.array(list(cent.values()))
# threshold = sorted(cent_array, reverse=True)[50]
# print("threshold", threshold)
# cent_bin = np.where(cent_array >= threshold, 1, 0.1)
# plt.figure(figsize=(12, 12))
# nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
#                                cmap=plt.cm.plasma,
#                                node_color=cent_bin,
#                                nodelist=list(cent.keys()),
#                                alpha=cent_bin)
# edges = nx.draw_networkx_edges(G, pos, width=0.25, alpha=0.3)
# plt.show()

This shows me the node with the highest degree.

In [160]:
# threshold = sorted(cent_array, reverse=True)[0]
# print("threshold", threshold)
# cent_bin = np.where(cent_array >= threshold, 1, 0.1)
# plt.figure(figsize=(12, 12))
# nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
#                                cmap=plt.cm.plasma,
#                                node_color=cent_bin,
#                                nodelist=list(cent.keys()),
#                                alpha=cent_bin)
# edges = nx.draw_networkx_edges(G, pos, width=0.25, alpha=0.3)
# plt.show()

In [161]:
save_embeddings_and_edges('cora')

tensor([3, 4, 4,  ..., 3, 3, 3], device='cuda:0')
2708
<class 'torch.Tensor'>
embeddings are correct: 0.7603397341211225
