### Appendix (Code)

### Load data

In [43]:
import scipy.sparse as sp
import numpy as np
import json
adj = sp.load_npz(r'C:\Users\user\Documents\JupyterWork\CSE881_DM\Project\data_2023/adj.npz')
feat  = np.load(r'C:\Users\user\Documents\JupyterWork\CSE881_DM\Project\data_2023/features.npy')
labels = np.load(r'C:\Users\user\Documents\JupyterWork\CSE881_DM\Project\data_2023/labels.npy')
splits = json.load(open(r'C:\Users\user\Documents\JupyterWork\CSE881_DM\Project\data_2023/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

In [44]:
from torch_geometric.utils import from_scipy_sparse_matrix

In [45]:
edge_index = from_scipy_sparse_matrix(adj)

In [46]:
edge_index

(tensor([[   0,    0,    0,  ..., 2478, 2478, 2479],
         [1084, 1104, 1288,  ...,  931,  933,  999]]),
 tensor([1., 1., 1.,  ..., 1., 1., 1.]))

In [47]:
len(splits['idx_train']), len(splits['idx_test'])

(496, 1984)

In [48]:
import numpy as np
import torch
from torch_geometric.data import Data

x = torch.tensor(feat, dtype=torch.float)
y = torch.zeros(adj.shape[0], dtype=torch.long)
y[idx_train] = torch.tensor(labels, dtype=torch.long)

edge_index = torch.tensor(adj.nonzero(), dtype=torch.long)
train_mask = torch.zeros(x.shape[0], dtype=torch.bool)
test_mask = torch.zeros(x.shape[0], dtype=torch.bool)

train_mask[idx_train] = True
test_mask[idx_test] = True

proj_data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask, test_mask=test_mask)

# proj_data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask, test_mask=test_mask)

In [49]:
adj.shape[0]

2480

In [50]:
# Gather some statistics about the graph.
print(f'Number of nodes: {proj_data.num_nodes}')
print(f'Number of edges: {proj_data.num_edges}')
print(f'Average node degree: {proj_data.num_edges / proj_data.num_nodes: .2f}')
print(f'Number of training nodes: {proj_data.train_mask.sum()}')
print(f'Number of test nodes: {proj_data.test_mask.sum()}')
print(f'Training node label rate: {proj_data.train_mask.sum()}')

Number of nodes: 2480
Number of edges: 10100
Average node degree:  4.07
Number of training nodes: 496
Number of test nodes: 1984
Training node label rate: 496


In [51]:
adj.shape

(2480, 2480)

In [52]:
edge_index

tensor([[   0,    0,    0,  ..., 2478, 2478, 2479],
        [1084, 1104, 1288,  ...,  931,  933,  999]])

In [53]:
# Add validation data later 

proj_data

Data(x=[2480, 1390], edge_index=[2, 10100], y=[2480], train_mask=[2480], test_mask=[2480])

### Prediction Model

In [54]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
import torch_geometric
from torch_geometric.nn import GCNConv


### Baseline Model

In [55]:
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, num_hidden)
        self.conv2 = GCNConv(num_hidden, num_hidden)
        self.conv3 = GCNConv(num_hidden, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
#         x = F.relu(x) # activation function
        x = torch.sigmoid(x)
        x = F.dropout(x, training=self.training) # to avoid overfitting
        
        x = self.conv2(x, edge_index)
#         x = F.relu(x) # activation function
        x = torch.sigmoid(x)
        x = F.dropout(x, training=self.training) # to avoid overfitting
        
        x = self.conv3(x, edge_index)

        return F.log_softmax(x, dim=1)

In [56]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

proj_data = proj_data.to(device)

In [57]:
model = GCN(num_node_features=proj_data.x.shape[1], 
            num_hidden=32,
            num_classes=(proj_data.y.max()+1).item()
           ).to(device)

# optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3)
# optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01, alpha=0.9, weight_decay=0.0005) #12.85
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01, alpha=0.8, weight_decay=0.0005) #12.05
# optimizer = torch.optim.RMSprop(model.parameters(), lr=0.1, alpha=0.5, weight_decay=0.001)

model.train()

for epoch in range(200):
    optimizer.zero_grad()
    out = model(proj_data)
    loss = F.nll_loss(out[proj_data.train_mask], proj_data.y[proj_data.train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print('Epoch {0}: {1}'.format(epoch, loss.item()))

Epoch 0: 2.306647777557373
Epoch 10: 1.737287998199463
Epoch 20: 1.4834630489349365
Epoch 30: 1.1442533731460571
Epoch 40: 0.9501225352287292
Epoch 50: 0.735469400882721
Epoch 60: 0.6281275153160095
Epoch 70: 0.5586093664169312
Epoch 80: 0.4976198673248291
Epoch 90: 0.4811908006668091
Epoch 100: 0.4073542356491089
Epoch 110: 0.3845098912715912
Epoch 120: 0.3798958361148834
Epoch 130: 0.3433077335357666
Epoch 140: 0.27892106771469116
Epoch 150: 0.28732556104660034
Epoch 160: 0.312401682138443
Epoch 170: 0.29221367835998535
Epoch 180: 0.268650621175766
Epoch 190: 0.25573262572288513


In [58]:
model.eval()
pred = model(proj_data).argmax(dim=1)
correct = (pred[proj_data.train_mask] == proj_data.y[proj_data.train_mask]).sum()
acc = int(correct) / int(proj_data.train_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.9657


---------------
### Grid Search

In [59]:
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, num_hidden)
        self.conv2 = GCNConv(num_hidden, num_hidden)
        self.conv3 = GCNConv(num_hidden, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
#         x = F.relu(x) # activation function
        x = torch.sigmoid(x)
        x = F.dropout(x, training=self.training) # to avoid overfitting
        
        x = self.conv2(x, edge_index)
#         x = F.relu(x) # activation function
        x = torch.sigmoid(x)
        x = F.dropout(x, training=self.training) # to avoid overfitting
        
        x = self.conv3(x, edge_index)

        return F.log_softmax(x, dim=1)

In [60]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

proj_data = proj_data.to(device)

In [61]:
import itertools

params = {
    'num_hidden': [16, 32, 64, 128],
    'lr': [0.003, 0.002, 0.001, 0.04, 0.03, 0.02, 0.01, 0.1],
    'weight_decay': [0.0005, 0.0001, 0.001, 0.01]
}

param_combinations = list(itertools.product(*params.values()))

best_accuracy = 0
for combination in param_combinations:
    num_hidden, lr, weight_decay = combination

    model = GCN(num_node_features=proj_data.x.shape[1], 
                num_hidden=num_hidden,
                num_classes=(proj_data.y.max()+1).item()
               ).to(device)
    optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.8, weight_decay=weight_decay)

    model.train()
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(proj_data)
        loss = F.nll_loss(out[proj_data.train_mask], proj_data.y[proj_data.train_mask])
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        logits = model(proj_data)
        preds = logits.argmax(dim=1)
        accuracy = (preds[proj_data.train_mask] == proj_data.y[proj_data.train_mask]).float().mean().item()

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = {
            'num_hidden': num_hidden,
            'lr': lr,
            'weight_decay': weight_decay
        }
        best_preds = preds

print('Best accuracy:', best_accuracy)
print('Best hyperparameters:', best_params)

# print predictions
best_preds = best_preds[proj_data.train_mask]
correct = (best_preds == proj_data.y[proj_data.train_mask]).sum()
acc = int(correct) / int(proj_data.train_mask.sum())
print(f'Train accuracy: {acc:.4f}')

Best accuracy: 0.9959677457809448
Best hyperparameters: {'num_hidden': 64, 'lr': 0.04, 'weight_decay': 0.0001}
Train accuracy: 0.9960


### Using the Best Hyperparameters from the Previous Step

In [62]:
# get best hyperparameters
num_hidden = best_params['num_hidden']
lr = best_params['lr']
weight_decay = best_params['weight_decay']

# create model with best hyperparameters
best_model = GCN(num_node_features=proj_data.x.shape[1], 
                 num_hidden=num_hidden,
                 num_classes=(proj_data.y.max()+1).item()
                ).to(device)
optimizer = torch.optim.RMSprop(best_model.parameters(), lr=lr, alpha=0.8, weight_decay=weight_decay)

# train model with best hyperparameters
best_model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = best_model(proj_data)
    loss = F.nll_loss(out[proj_data.train_mask], proj_data.y[proj_data.train_mask])
    loss.backward()
    optimizer.step()


In [63]:
best_model.eval()
pred = best_model(proj_data).argmax(dim=1)
correct = (pred[proj_data.train_mask] == proj_data.y[proj_data.train_mask]).sum()
acc = int(correct) / int(proj_data.train_mask.sum())
print(f'Accuracy: {acc:.4f}')


Accuracy: 0.9778


## How to submit the result

In [66]:
pred[idx_test]

tensor([6, 2, 6,  ..., 1, 1, 5])

In [67]:
preds = pred[idx_test]
np.savetxt('submission_9778.txt', preds, fmt='%d')
