# import library required

In [3]:
! pip install dgl

Collecting dgl
[?25l  Downloading https://files.pythonhosted.org/packages/2b/b6/5450e9bb80842ab58a6ee8c0da8c7d738465703bceb576bd7e9782c65391/dgl-0.6.0-cp37-cp37m-manylinux1_x86_64.whl (4.2MB)
[K     |████████████████████████████████| 4.2MB 6.9MB/s 
Installing collected packages: dgl
Successfully installed dgl-0.6.0


In [4]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl  # Deep Graph Library
from dgl.data import CoraGraphDataset
from dgl.nn.pytorch.conv import SAGEConv
from sklearn.metrics import f1_score

DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


# load data

In [5]:
G = CoraGraphDataset()
num_classes = G.num_classes
G = G[0]
features = G.ndata['feat']
input_feature_dim = features.shape[1]
labels = G.ndata['label']
train_mask = G.ndata['train_mask']
test_mask = G.ndata['test_mask']

Downloading /root/.dgl/cora_v2.zip from https://data.dgl.ai/dataset/cora_v2.zip...
Extracting file to /root/.dgl/cora_v2
Finished data loading and preprocessing.
  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.


# modeling

In [15]:
# 하이퍼파라미터 초기화
dropout_p = 0.5
learning_rate = 1e-2
num_epochs = 50
num_hidden_dim = 128
num_layers = 2
weight_decay = 5e-4
aggregator_type = "gcn"

In [11]:
class GraphSAGE(nn.Module):
    def __init__(self, graph, in_feat_dim, num_hidden_dim, num_classes, num_layers, activation_fun, dropout_p, aggregator_type):
        super(GraphSAGE, self).__init__()
        self.layers = nn.ModuleList()
        self.graph = graph
        self.layers.append(SAGEConv(in_feat_dim, num_hidden_dim, aggregator_type, dropout_p, activation_fun))

        for i in range(num_layers):
            self.layers.append(SAGEConv(num_hidden_dim, num_hidden_dim, aggregator_type, dropout_p, activation_fun))
        
        self.layers.append(SAGEConv(num_hidden_dim, num_classes, aggregator_type, dropout_p, activation=None))
    
    def forward(self, features):
        x = features
        for layer in self.layers:
            x = layer(self.graph, x)
        return x


In [7]:
# 모델 학습 결과를 평가할 함수
def evaluate_train(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

def evaluate_test(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        macro_f1 = f1_score(labels, indices, average = 'macro')
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels), macro_f1

In [19]:
def train(model, loss_fun, features, labels, train_mask, optim, num_epochs):
    running_time = []
    for epoch in range(num_epochs):
        model.train()
        start_t = time.time()
        logits = model(features)
        loss = loss_fun(logits[train_mask], labels[train_mask])
        optim.zero_grad()
        loss.backward()
        optim.step()
        running_time.append(time.time()-start_t)
        acc = evaluate_train(model, features, labels,train_mask)
        print(f"Epoch: {epoch}\tTime: {running_time[epoch]:.2f}\tLoss: {loss.item(): .4f}\tAccuracy: {acc}")

def test(model, features, labels, test_mask):
    acc, macro_f1 = evaluate_test(model, features, labels, test_mask)
    print(f"Test Acc: {acc: .4f}")
    print(f"Test Macro-f1: {macro_f1: .4f}")

In [12]:
model = GraphSAGE(G, input_feature_dim, num_hidden_dim, num_classes, num_layers, F.relu, dropout_p, aggregator_type)
print(model)

GraphSAGE(
  (layers): ModuleList(
    (0): SAGEConv(
      (feat_drop): Dropout(p=0.5, inplace=False)
      (fc_neigh): Linear(in_features=1433, out_features=128, bias=True)
    )
    (1): SAGEConv(
      (feat_drop): Dropout(p=0.5, inplace=False)
      (fc_neigh): Linear(in_features=128, out_features=128, bias=True)
    )
    (2): SAGEConv(
      (feat_drop): Dropout(p=0.5, inplace=False)
      (fc_neigh): Linear(in_features=128, out_features=128, bias=True)
    )
    (3): SAGEConv(
      (feat_drop): Dropout(p=0.5, inplace=False)
      (fc_neigh): Linear(in_features=128, out_features=7, bias=True)
    )
  )
)


In [13]:
loss_fun = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)

In [20]:
train(model, loss_fun, features, labels, train_mask, optim, num_epochs)

Epoch: 0	Time: 0.15	Loss:  1.9969	Accuracy: 0.4357142857142857
Epoch: 1	Time: 0.13	Loss:  1.7909	Accuracy: 0.42142857142857143
Epoch: 2	Time: 0.13	Loss:  1.6590	Accuracy: 0.5214285714285715
Epoch: 3	Time: 0.12	Loss:  1.4744	Accuracy: 0.8928571428571429
Epoch: 4	Time: 0.13	Loss:  1.2051	Accuracy: 0.8928571428571429
Epoch: 5	Time: 0.13	Loss:  0.8743	Accuracy: 0.9214285714285714
Epoch: 6	Time: 0.13	Loss:  0.6431	Accuracy: 0.9214285714285714
Epoch: 7	Time: 0.13	Loss:  0.4467	Accuracy: 0.95
Epoch: 8	Time: 0.13	Loss:  0.2938	Accuracy: 0.9642857142857143
Epoch: 9	Time: 0.13	Loss:  0.2188	Accuracy: 0.9642857142857143
Epoch: 10	Time: 0.13	Loss:  0.1483	Accuracy: 0.9785714285714285
Epoch: 11	Time: 0.13	Loss:  0.1259	Accuracy: 0.9857142857142858
Epoch: 12	Time: 0.13	Loss:  0.0831	Accuracy: 0.9928571428571429
Epoch: 13	Time: 0.13	Loss:  0.0832	Accuracy: 0.9928571428571429
Epoch: 14	Time: 0.13	Loss:  0.0347	Accuracy: 1.0
Epoch: 15	Time: 0.13	Loss:  0.0709	Accuracy: 0.9928571428571429
Epoch: 16	Time

In [21]:
test(model, features, labels, test_mask)

Test Acc:  0.7920
Test Macro-f1:  0.7799
