In [1]:
import torch

In [2]:
import os
os.getcwd()

'C:\\Users\\KEARNEY\\Desktop\\새 폴더\\c_project\\LDH\\code\\GCN'

In [3]:
import numpy as np
import scipy.sparse as sp
import torch


def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot


def load_data(path="../code/", dataset="cora"):
    """Load citation network dataset (cora only for now)"""
    print('Loading {} dataset...'.format(dataset))
    path = 'C:/Users/KEARNEY/Desktop/새 폴더/c_project/LDH/code/GCN/'

    idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
                                        dtype=np.dtype(str))
    features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
    labels = encode_onehot(idx_features_labels[:, -1])

    # build graph
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
    idx_map = {j: i for i, j in enumerate(idx)}
    edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
                                    dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)

    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    features = normalize(features)
    adj = normalize(adj + sp.eye(adj.shape[0]))

    idx_train = range(140)
    idx_val = range(200, 500)
    idx_test = range(500, 1500)

    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(np.where(labels)[1])
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    return adj, features, labels, idx_train, idx_val, idx_test


def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [4]:
A, features, labels, idx_train, idx_val, idx_test = load_data()

Loading cora dataset...


  return torch.sparse.FloatTensor(indices, values, shape)


In [5]:
import torch.nn as nn
import torch.nn.functional as F

In [17]:
class GCN_layer(nn.Module):
    def __init__(self, in_features, out_features, A):
        super(GCN_layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.A = A
        self.fc = nn.Linear(in_features, out_features)
        
    def forward(self, X):
        return self.fc(torch.spmm(self.A, X)) #이웃 정보 종합

class GCN(nn.Module):
    def __init__(self, num_feature, num_class, A):
        super(GCN, self).__init__()

        self.feature_extractor = nn.Sequential(
                                    GCN_layer(num_feature, 16, A),
                                    nn.ReLU(),
                                    GCN_layer(16, num_class, A)
                                )
        
    def forward(self, X):
        return self.feature_extractor(X)

In [22]:
def train(model, Loss, optimizer, num_epochs):
    train_loss_arr = []
    test_loss_arr = []

    best_test_loss = 99999999
    best_ACC=1000
    final_ACC=0

    early_stop, early_stop_max = 0., 10.

    for epoch in range(num_epochs):

        # Forward Pass
        model.train()
        output = model(features)
        train_loss = criterion(output[idx_train], labels[idx_train])

        # Backward and optimize
        train_loss.backward()
        optimizer.step()
        
        train_loss_arr.append(train_loss.data)
    
        if epoch % 10 == 0:
            model.eval()
        
            output = model(features)
            val_loss = criterion(output[idx_val], labels[idx_val])
            test_loss = criterion(output[idx_test], labels[idx_test])
        
            val_acc = accuracy(output[idx_val], labels[idx_val])
            test_acc = accuracy(output[idx_test], labels[idx_test])
        
            test_loss_arr.append(test_loss)
        
            if best_ACC > val_acc:
                best_ACC = val_acc
                early_stop = 0
                final_ACC = test_acc
                print('Epoch [{}/{}], Train Loss: {:.4f}, Test Loss: {:.4f}, Test ACC: {:.4f} *'.format(epoch, 100, train_loss.data, test_loss, test_acc))
            else:
                early_stop += 1

                print('Epoch [{}/{}], Train Loss: {:.4f}, Test Loss: {:.4f}, Test ACC: {:.4f}'.format(epoch, 100, train_loss.data, test_loss, test_acc))

        if early_stop >= early_stop_max:
            break
        
    print("Final Accuracy::", final_ACC)

In [23]:
class FCN(nn.Module):
    def __init__(self, num_feature, num_class):
        super(FCN, self).__init__()

        self.feature_extractor = nn.Sequential(
                                    nn.Linear(num_feature, 16),
                                    nn.ReLU(),
                                    nn.Linear(16, num_class)
                                )

    def forward(self, x):
        return self.feature_extractor(x)

In [24]:
# FCN 학습 돌려서 epoch에 따른 Loss 확인
import torch.optim as optim

model = FCN(features.size(1) , labels.unique().size(0))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0001)

train(model, criterion, optimizer, 1000)

print("finish FCN")
# GCN 학습 돌려서 epoch에 따른 Loss 확인
model = GCN(features.size(1) , labels.unique().size(0), A)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0001)

train(model, criterion, optimizer, 1000)

Epoch [0/100], Train Loss: 1.9121, Test Loss: 1.8885, Test ACC: 0.3090 *
Epoch [10/100], Train Loss: 0.7556, Test Loss: 1.9785, Test ACC: 0.2050 *
Epoch [20/100], Train Loss: 0.5313, Test Loss: 4.8541, Test ACC: 0.3710
Epoch [30/100], Train Loss: 0.1605, Test Loss: 7.8023, Test ACC: 0.3500
Epoch [40/100], Train Loss: 0.6851, Test Loss: 20.5386, Test ACC: 0.1970 *
Epoch [50/100], Train Loss: 0.3729, Test Loss: 16.3366, Test ACC: 0.4040
Epoch [60/100], Train Loss: 1.4503, Test Loss: 28.9251, Test ACC: 0.4020
Epoch [70/100], Train Loss: 2.2472, Test Loss: 34.7367, Test ACC: 0.3370
Epoch [80/100], Train Loss: 0.6101, Test Loss: 23.8472, Test ACC: 0.3400
Epoch [90/100], Train Loss: 0.3286, Test Loss: 23.2772, Test ACC: 0.3220
Epoch [100/100], Train Loss: 3.6857, Test Loss: 43.5995, Test ACC: 0.3560
Epoch [110/100], Train Loss: 2.4076, Test Loss: 50.8979, Test ACC: 0.3670
Epoch [120/100], Train Loss: 1.2418, Test Loss: 41.8214, Test ACC: 0.3660
Epoch [130/100], Train Loss: 3.4558, Test Loss: