In [2]:
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataset import random_split

import dgl
from dgl import function as fn
from dgl import DGLGraph
from dgl.data import citation_graph as citegrh

import networkx as nx
import matplotlib.pyplot as plt
import random as rand
import numpy as np

In [3]:


#set gpu if available
if th.cuda.is_available():
    print("GPU is available")
    #device = th.device("cuda")
    device = th.device("cuda")
else:
    print("GPU not available, CPU used")
    device = th.device("cpu")

GPU is available


In [3]:
#operation for neigbors
class NodeApplyModule(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(NodeApplyModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation

    def forward(self, node):
        h = self.linear(node.data['h'])
        if self.activation is not None:
            h = self.activation(h)
        return {'h' : h}
    
#gcn layer in network
class GCN(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)

    def forward(self, g, feature):
        
        g.ndata['h'] = feature
        g.pull(g.nodes())
        g.apply_nodes(self.apply_mod)
        
        return g.ndata.pop('h')
    
#network
class GCN_LL(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCN_LL, self).__init__()
        self.gcn1 = GCN(in_feats, 100, F.relu)
        self.gcn2 = GCN(100, 30, F.relu)
        self.gcn3 = GCN(30, out_feats, th.tanh)

    def forward(self, g, features):
        x = self.gcn1(g, features)
        x = self.gcn2(g, x)
        
        return self.gcn3(g, x)


In [9]:
#loss function

"""Here is an implementation for your similarity_matrix using only matrix operations. It can run on the GPU and is going to be significantly faster than your previous implementation.
https://discuss.pytorch.org/t/build-your-own-loss-function-in-pytorch/235/6
"""

"""
Issue and to-do:
    Similarity matrix results in nan
"""

def similarity_matrix(x):
    x_norm = (x**2).sum(1).view(-1, 1)
    y = x
    y_norm = x_norm.view(1, -1)

    dist = x_norm + y_norm - 2.0 * th.mm(x, th.transpose(y, 0, 1))
    return dist

def same_label(y):
    s = y.size(0)
    y_expand = y.unsqueeze(0).expand(s, s)
    Y = y_expand.eq(y_expand.t())
    return Y

def my_loss(output, labels):
    """
    if nodes with the same label: x^2
    if nodes with different label: 1/(10*x^2)
    """
    sim = similarity_matrix(output)
    temp = same_label(labels)
    same_l = (temp * sim)
    same_l_inv = ((temp*(-1) + 1) * sim)
    
    loss = ((th.sum(same_l)**2) + (1/(10*th.sum(same_l_inv)**2)))
    #loss = ((th.mean(same_l)**2) + (th.mean(same_l_inv)**2))
    
    return loss

def has_2_rand_label(nodes):
    ava_labels = list(possible_lab)
    rand.shuffle(ava_labels)
    
    out = (nodes.data['t_label'] == ava_labels[0] or nodes.data['t_label'] == ava_labels[2]).squeeze(1)
    possible_lab.pop(0)
    possible_lab.pop(0)
    
    return out

def has_1_rand_label(nodes):
    ava_labels = list(possible_lab)
    rand.shuffle(ava_labels)
    
    out = (nodes.data['t_label'] == ava_labels[0]).squeeze(1)
    possible_lab.pop(0)
    
    return out




In [10]:
#load dataset
data = citegrh.load_cora()
ds_features = th.FloatTensor(data.features).to(device) #convert to pytorch data type #######
ds_labels = th.LongTensor(data.labels).to(device)
ds_g = data.graph

# add self loop for the sum of festures
ds_g.remove_edges_from(nx.selfloop_edges(ds_g))
ds_g = DGLGraph(ds_g)
ds_g.add_edges(ds_g.nodes(), ds_g.nodes())
ds_g.ndata['features'] = ds_features
ds_g.ndata['t_label'] = ds_labels #used to filter and train the first two labels, not needed for prediction

# to coordinate sending of features over the graph network
m_func = fn.copy_src(src='h', out='m')
m_reduce_func = fn.sum(msg='m', out='h')

In [13]:
########### Create Model ############

#constant parameters
DIST_VEC_SIZE = 10
NUMBER_OF_LABELS = 7

model = GCN_LL(ds_features.size()[1], DIST_VEC_SIZE).to(device)
opt = th.optim.Adam(model.parameters(), lr=1e-3)# only run once

In [None]:
#training

train_g = ds_g.subgraph(ds_g.nodes()[:int(len(ds_g) * .80)])#80 percent of all the nodes
train_g.copy_from_parent()

possible_lab = list(range(NUMBER_OF_LABELS))
EPOCH = 50

model.train()

#############train with 2 labels of 7 labels (uses true label from the dataset), assign vector to each node,
            #knn (1 labeled node for each label), test
selected_nodes = train_g.filter_nodes(has_2_rand_label) #add two unseen labels

#train
for epoch in range(EPOCH):
    rand.shuffle(selected_nodes)
    epoch_nodes = selected_node[:int(len(selected_node)*.20)] #selected 20% of random nodes to train with at each epoch
    
    error = []
    for count in range(len(epoch_nodes)):
        sub_graph = train_g.subgraph(epoch_nodes[:count])
        sub_graph.copy_from_parent()
        sub_graph.register_message_func(m_func)
        sub_graph.register_reduce_func(m_reduce_func)
        
        feats = sub_graph.ndata['features']
        labs = sub_graph.ndata['t_labels'] #true label
        
        out = model(sub_graph, feats)
        loss = my_loss(out, labs)
        error.append(loss.item())
        
        opt.zero_grad()
        loss.backward()
        opt.step()
#assign vector
#knn
#test
    

In [7]:
  
#training 
#######################add label, assign vector to each node, knn, test and train (with predicted labels by knn)

while len(possible_lab) > 0: #do it for every label ()
    selected_nodes.extend(train_g.filter_nodes(has_1_rand_label)) #add unseen new label (change 1 to 2 to add to labels at once)
    rand.shuffle(selected_nodes)
    
    #assign vector
    #knn
    #test
    #train
    for epoch in range(EPOCH):
        rand.shuffle(selected_nodes)
        epoch_nodes = selected_node[:int(len(selected_node)*.20)] #select 20% of random nodes to train with at each epoch

        error = []
        for count in range(len(epoch_nodes)):
            sub_graph = train_g.subgraph(epoch_nodes[:count])
            sub_graph.copy_from_parent()
            sub_graph.register_message_func(m_func)
            sub_graph.register_reduce_func(m_reduce_func)

            feats = sub_graph.ndata['features']
            
            labs = sub_graph.ndata['t_labels'] #change to predicted label by knn (if label not given) *********

            out = model(sub_graph, feats)
            loss = my_loss(out, labs)
            error.append(loss.item())

            opt.zero_grad()
            loss.backward()
            opt.step()

Epoch 0: 
	avg error = 2.849159374212042e-06 
	last error = 3.274331472624681e-09 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 1: 
	avg error = 8.396367201980384e-09 
	last error = 1.2516924163818999e-09 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 2: 
	avg error = 3.1999168450808323e-09 
	last error = 9.623448704587645e-10 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 3: 
	avg error = 1.4447087481560294e-09 
	last error = 3.491356093121567e-10 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 4: 
	avg error = 7.560877813046131e-10 
	last error = 1.263662063877291e-09 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 5: 
	avg error = 4.4562802434009954e-10 
	last error = 7.072740171309988e-10 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 6: 
	avg error = 2.901853388827861e-10 
	last error = 5.298499139438917e-11 
	avg accuracy = 0.0 
	last accuracy = 0.0 
Epoch 7: 
	avg error = 2.0170063083032827e-10 
	last error = 2.8992380718406707e-10 
	avg accuracy = 0.0 
	last ac

In [10]:
ds_features[:10]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')