In [1]:
# I'm using google colab. Here is the necessary installation
!pip install torch_geometric
!pip install torch_scatter
!pip install torch_sparse
!pip install torch-sparse-old

Collecting torch_geometric
  Downloading torch_geometric-2.0.3.tar.gz (370 kB)
[K     |████████████████████████████████| 370 kB 5.3 MB/s 
Collecting rdflib
  Downloading rdflib-6.1.1-py3-none-any.whl (482 kB)
[K     |████████████████████████████████| 482 kB 41.0 MB/s 
Collecting yacs
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 505 kB/s 
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.0.3-py3-none-any.whl size=581968 sha256=80f0875ada7714700d6f859497f133197614c8c6d0348a0262c68ac9c2614df8
  Stored in directory: /root/.cache/pip/wheels/c3/2a/58/87ce0508964d4def1aafb92750c4f3ac77038efd1b9a89dcf5
Successfully built torch-geometric
Installing collected packages: isodate, yacs, rdflib, torch-geometric
Successfully installed

In [2]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import math
import pdb
import pickle
from matplotlib import pyplot as plt

In [3]:
# some specially defined functions for the initialization of parameters in models
def uniform(size, tensor):
    bound = 1.0 / math.sqrt(size)
    if tensor is not None:
        tensor.data.uniform_(-bound, bound)


def kaiming_uniform(tensor, fan, a):
    #kaiming's initialization method, specially for layers with Relu function 
    bound = math.sqrt(6 / ((1 + a**2) * fan))
    if tensor is not None:
        tensor.data.uniform_(-bound, bound)


def glorot(tensor):
    stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
    if tensor is not None:
        tensor.data.uniform_(-stdv, stdv)


def zeros(tensor):
    if tensor is not None:
        tensor.data.fill_(0)


def ones(tensor):
    if tensor is not None:
        tensor.data.fill_(1)


def reset(nn):
    def _reset(item):
        if hasattr(item, 'reset_parameters'):
            item.reset_parameters()

    if nn is not None:
        if hasattr(nn, 'children') and len(list(nn.children())) > 0:
            for item in nn.children():
                _reset(item)
        else:
            _reset(nn)

In [4]:
# the main Graph Transformer Network
class GTN(nn.Module):
    # the GTN mainly contains the multiple layers of GTlayers of multiple channel
    # (the multiple channels are derived from multiple 1*1 conv)
    # and finally the GCN is applied on every channel of
    
    def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
        super(GTN, self).__init__()
        self.num_edge = num_edge
        self.num_channels = num_channels
        self.w_in = w_in
        self.w_out = w_out
        self.num_class = num_class
        self.num_layers = num_layers
        self.is_norm = norm
        layers = []
        for i in range(num_layers):
            if i == 0:
                layers.append(GTLayer(num_edge, num_channels, first=True))
            else:
                layers.append(GTLayer(num_edge, num_channels, first=False))
        self.layers = nn.ModuleList(layers)
        self.weight = nn.Parameter(torch.Tensor(w_in, w_out))
        self.bias = nn.Parameter(torch.Tensor(w_out))
        self.loss = nn.CrossEntropyLoss()
        self.linear1 = nn.Linear(self.w_out*self.num_channels, self.w_out)
        self.linear2 = nn.Linear(self.w_out, self.num_class)
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)
        nn.init.zeros_(self.bias)

    def gcn_conv(self,X,H):
        X = torch.mm(X, self.weight)
        H = self.norm(H, add=True)
        return torch.mm(H.t(),X)

    def normalization(self, H):
        for i in range(self.num_channels):
            if i==0:
                H_ = self.norm(H[i,:,:]).unsqueeze(0)
            else:
                H_ = torch.cat((H_,self.norm(H[i,:,:]).unsqueeze(0)), dim=0)
        return H_

    def norm(self, H, add=False):
        H = H.t()
        if add == False:
            H = H*((torch.eye(H.shape[0])==0).type(torch.FloatTensor))
        else:
            H = H*((torch.eye(H.shape[0])==0).type(torch.FloatTensor)) + torch.eye(H.shape[0]).type(torch.FloatTensor)
        deg = torch.sum(H, dim=1)
        deg_inv = deg.pow(-1)
        deg_inv[deg_inv == float('inf')] = 0
        deg_inv = deg_inv*torch.eye(H.shape[0]).type(torch.FloatTensor)
        H = torch.mm(deg_inv,H)
        H = H.t()
        return H

    def forward(self, A, X, target_x, target):
        A = A.unsqueeze(0).permute(0,3,1,2) 
        Ws = []
        # go through multiple GTlayers
        for i in range(self.num_layers):
            if i == 0:
                H, W = self.layers[i](A)
            else:
                H = self.normalization(H)
                H, W = self.layers[i](A, H)
            Ws.append(W)
        
        # apply gcn_conv to each channel(2 in total), concatenate them
        # and finally combine them into one
        for i in range(self.num_channels):
            if i==0:
                X_ = F.relu(self.gcn_conv(X,H[i]))
            else:
                X_tmp = F.relu(self.gcn_conv(X,H[i]))
                X_ = torch.cat((X_,X_tmp), dim=1)
        X_ = self.linear1(X_)
        X_ = F.relu(X_)
        y = self.linear2(X_[target_x])
        loss = self.loss(y, target)
        return loss, y, Ws

class GTLayer(nn.Module):
    # do 1*1 conv to get multiple candidate matrices
    # and do matrix multiplication on the two to get new one 
    def __init__(self, in_channels, out_channels, first=True):
        super(GTLayer, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.first = first
        if self.first == True:
            self.conv1 = GTConv(in_channels, out_channels)
            self.conv2 = GTConv(in_channels, out_channels)
        else:
            self.conv1 = GTConv(in_channels, out_channels)
    
    def forward(self, A, H_=None):
        # in first layer we do conv twice to get to candidates
        if self.first == True:
            a = self.conv1(A)
            b = self.conv2(A)
            H = torch.bmm(a,b)
            W = [(F.softmax(self.conv1.weight, dim=1)).detach(),(F.softmax(self.conv2.weight, dim=1)).detach()]
        else:
            # only do conv once, the candidates would be the prior one and the latter one
            a = self.conv1(A)
            H = torch.bmm(H_,a)
            W = [(F.softmax(self.conv1.weight, dim=1)).detach()]
        return H,W

class GTConv(nn.Module):
    # the convolution in GTlayers
    def __init__(self, in_channels, out_channels):
        super(GTConv, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.weight = nn.Parameter(torch.Tensor(out_channels,in_channels,1,1))
        self.bias = None
        self.scale = nn.Parameter(torch.Tensor([0.1]), requires_grad=False)
        self.reset_parameters()
    def reset_parameters(self):
        n = self.in_channels
        nn.init.constant_(self.weight, 0.1)
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, A):
       # 1*1 conv is actually similar to fully connected layers
       # but here softmax function is applied to the weights
        A = torch.sum(A*F.softmax(self.weight, dim=1), dim=1)
        return A

In [7]:
with open('/content/node_features.pkl','rb') as f:
    node_features = pickle.load(f)
with open('/content/edges.pkl','rb') as f:
    edges = pickle.load(f)
with open('/content/labels.pkl','rb') as f:
    labels = pickle.load(f)
num_nodes = edges[0].shape[0]

# generating input matrix from edges
for i,edge in enumerate(edges):
    if i ==0:
        A = torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1)
    else:
        A = torch.cat([A,torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1)], dim=-1)
A = torch.cat([A,torch.eye(num_nodes).type(torch.FloatTensor).unsqueeze(-1)], dim=-1)
    
node_features = torch.from_numpy(node_features).type(torch.FloatTensor)
train_node = torch.from_numpy(np.array(labels[0])[:,0]).type(torch.LongTensor)
train_target = torch.from_numpy(np.array(labels[0])[:,1]).type(torch.LongTensor)
valid_node = torch.from_numpy(np.array(labels[1])[:,0]).type(torch.LongTensor)
valid_target = torch.from_numpy(np.array(labels[1])[:,1]).type(torch.LongTensor)
test_node = torch.from_numpy(np.array(labels[2])[:,0]).type(torch.LongTensor)
test_target = torch.from_numpy(np.array(labels[2])[:,1]).type(torch.LongTensor)

In [8]:
# calculate the evaluation score of model prediction
# accu, TP TN FP FN ,recall, precision and F1-score 
def accuracy(pred, target):
    
    return (pred == target).sum().item() / target.numel()



def true_positive(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred == i) & (target == i)).sum())

    return torch.tensor(out)



def true_negative(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred != i) & (target != i)).sum())

    return torch.tensor(out)



def false_positive(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred == i) & (target != i)).sum())

    return torch.tensor(out)



def false_negative(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred != i) & (target == i)).sum())

    return torch.tensor(out)



def precision(pred, target, num_classes):
    tp = true_positive(pred, target, num_classes).to(torch.float)
    fp = false_positive(pred, target, num_classes).to(torch.float)

    out = tp / (tp + fp)
    out[torch.isnan(out)] = 0

    return out



def recall(pred, target, num_classes):
    tp = true_positive(pred, target, num_classes).to(torch.float)
    fn = false_negative(pred, target, num_classes).to(torch.float)

    out = tp / (tp + fn)
    out[torch.isnan(out)] = 0

    return out



def f1_score(pred, target, num_classes):
    prec = precision(pred, target, num_classes)
    rec = recall(pred, target, num_classes)

    score = 2 * (prec * rec) / (prec + rec)
    score[torch.isnan(score)] = 0

    return score


In [9]:
# training settings
epochs = 40
node_dim = 2
num_channels = 2
lr = 0.005
weight_decay = 0.001
num_layers = 3
norm = "true"
adaptive_lr = 'true'

num_classes = torch.max(train_target).item()+1
final_f1 = 0
# build up the model
for l in range(1):
    model = GTN(num_edge=A.shape[-1],
                        num_channels=num_channels,
                        w_in = node_features.shape[1],
                        w_out = node_dim,
                        num_class=num_classes,
                        num_layers=num_layers,
                        norm=norm)
    if adaptive_lr == 'false':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)
    else:
        #using different learning rate for different layers
        optimizer = torch.optim.Adam([{'params':model.weight},
                                    {'params':model.linear1.parameters()},
                                    {'params':model.linear2.parameters()},
                                    {"params":model.layers.parameters(), "lr":0.5}
                                    ], lr=0.005, weight_decay=0.001)
    loss = nn.CrossEntropyLoss()
    # to record the best value of each score
    best_val_loss = 10000
    best_test_loss = 10000
    best_train_loss = 10000
    best_train_f1 = 0
    best_val_f1 = 0
    best_test_f1 = 0
        
    for i in range(epochs):
        for param_group in optimizer.param_groups:
            if param_group['lr'] > 0.005:
                param_group['lr'] = param_group['lr'] * 0.9
        print('Epoch:  ',i+1)
        model.zero_grad()
        model.train() # setting model to train mode
        loss,y_train,Ws = model(A, node_features, train_node, train_target) #forward 

        # calculate F1-score on train set
        train_f1 = torch.mean(f1_score(torch.argmax(y_train.detach(),dim=1), train_target, num_classes=num_classes)).cpu().numpy()
        print('Train - Loss: {}, Macro_F1: {}'.format(loss.detach().cpu().numpy(), train_f1))

        loss.backward() # backward to calculate gradients
        optimizer.step() # update the weights
        model.eval() # setting model to test mode
            # validation
        with torch.no_grad():
            val_loss, y_valid,_ = model.forward(A, node_features, valid_node, valid_target)
            val_f1 = torch.mean(f1_score(torch.argmax(y_valid,dim=1), valid_target, num_classes=num_classes)).cpu().numpy()
            print('Valid - Loss: {}, Macro_F1: {}'.format(val_loss.detach().cpu().numpy(), val_f1))
            test_loss, y_test,W = model.forward(A, node_features, test_node, test_target)
            test_f1 = torch.mean(f1_score(torch.argmax(y_test,dim=1), test_target, num_classes=num_classes)).cpu().numpy()
            print('Test - Loss: {}, Macro_F1: {}\n'.format(test_loss.detach().cpu().numpy(), test_f1))
        if val_f1 > best_val_f1:
            best_val_loss = val_loss.detach().cpu().numpy()
            best_test_loss = test_loss.detach().cpu().numpy()
            best_train_loss = loss.detach().cpu().numpy()
            best_train_f1 = train_f1
            best_val_f1 = val_f1
            best_test_f1 = test_f1 
    print('---------------Best Results--------------------')
    print('Train - Loss: {}, Macro_F1: {}'.format(best_train_loss, best_train_f1))
    print('Valid - Loss: {}, Macro_F1: {}'.format(best_val_loss, best_val_f1))
    print('Test - Loss: {}, Macro_F1: {}'.format(best_test_loss, best_test_f1))
    final_f1 += best_test_f1

Epoch:   1


KeyboardInterrupt: ignored

In [None]:
model.train()