In [2]:
# read log.txt 

import pandas as pd
import matplotlib.pyplot as plt


known_table = {}

known_table["GCN+NRE"] = {
    "Cora": {"f1": "76.7 +/- 0.9", "acc": "92.7 +/- 0.2"},
    "CiteSeer": {"f1": "66.2 +/- 1.1", "acc": "93.2 +/- 0.2"}
}

known_table["GCN+URE"] = {
     "Cora": {"f1": "50.9 +/- 0.8", "acc": "88.0 +/- 0.1"},
    "CiteSeer": {"f1": "42.6 +/- 1.7", "acc": "90.9 +/- 0.2"}
}

# dict to dataframe
pd.DataFrame.from_dict({(i,j): known_table[i][j] 
                           for i in known_table.keys() 
                           for j in known_table[i].keys()},
                       orient='index')



Unnamed: 0,Unnamed: 1,f1,acc
GCN+NRE,Cora,76.7 +/- 0.9,92.7 +/- 0.2
GCN+NRE,CiteSeer,66.2 +/- 1.1,93.2 +/- 0.2
GCN+URE,Cora,50.9 +/- 0.8,88.0 +/- 0.1
GCN+URE,CiteSeer,42.6 +/- 1.7,90.9 +/- 0.2


In [3]:
import os
import pandas as pd
file_list = os.listdir('logs')
rows = []

my_table = {}


model = "my_GCN"
for file_name in file_list:
    dataset = file_name.split('_')[0]
    method = "my" + file_name.split('_')[1][:-4]
    file_name = 'logs/' + file_name
    df = pd.read_csv(file_name, sep=' ', header=None)
    
#     method = file_name[:-4]

    result_f1 = "{} +/- {}".format(round(df[0].mean() * 100, 1), round(df[0].std() * 100, 1))
    result_acc = "{} +/- {}".format(round(df[1].mean() * 100, 1), round(df[1].std() * 100, 1))

    if method not in my_table:
        my_table[method] = {
            dataset: {"f1": result_f1, "acc": result_acc}
        }
    else:
        my_table[method][dataset] = {"f1": result_f1, "acc": result_acc}

# add my_table to known_table
known_table.update(my_table)

# sort by key
my_table = {k: my_table[k] for k in sorted(my_table.keys())}

pd.DataFrame.from_dict({(i, j): my_table[i][j] 
                           for i in my_table.keys() 
                           for j in my_table[i].keys()},
                       orient='index')

Unnamed: 0,Unnamed: 1,f1,acc
myGCN+NRE,Cora,84.7 +/- 0.2,91.8 +/- 0.1
myGCN+NRE,CiteSeer,81.0 +/- 0.3,92.7 +/- 0.1
myGCN+URE,Cora,75.1 +/- 0.4,87.9 +/- 0.2
myGCN+URE,CiteSeer,69.0 +/- 0.8,89.8 +/- 0.2


In [3]:
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
import torch.nn as nn
import torch


class GCN(nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = torch.sigmoid(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

 
Label = {
    "-1": 0,
    "1": 1
}


class GRABLoss(nn.Module):
    def __init__(self, loss=(lambda x: torch.sigmoid(x))) -> None:
        super().__init__()
        self.loss_func = loss
        self.positive = Label["1"]
        self.unlabelled = Label["-1"]

    def forward(self, inp, target, b):
        positive, unlabelled = target == self.positive, target == self.unlabelled
        positive, unlabelled = positive.type(torch.float), unlabelled.type(torch.float)
        n_pos, n_unlb = torch.sum(positive), torch.sum(unlabelled)

        # inp [n, 1] to [n, 2] where the second column is 1 - inp
        inp = torch.cat((inp, 1 - inp), dim=1)
        target = torch.cat((target, 1 - target), dim=1)
        # element wise dot product
        prod = torch.sum(inp * target, dim=-1)
        y_pos = self.loss_func(prod) * positive
        
        prod = torch.sum(inp * b, dim=-1)
        y_unlabelled = self.loss_func(prod) * unlabelled
        positive_risk = torch.sum(y_pos) / n_pos
        unlb_risk = torch.sum(y_unlabelled) / n_unlb
        
        return positive_risk + unlb_risk

In [16]:
from copy import deepcopy

def has_converged(M_prev, M):
    # if M_prev.min() == 0: 
    #     M = M_prev
    #     return True
    diff = torch.max(torch.abs(M_prev - M))
    return diff < 1e-4


def LBP(prior, data):
    # prior : prior probability of positive class
    # data : graph data
    # return : belief for each node
    #-------------------------------------#
    # data : (2708, 1433)
    # edge_index : (2, 10556)
    #-------------------------------------#
    
    eps = 0.9
    num_nodes = data.num_nodes
    num_edges = data.edge_index.shape[1]
    b =  torch.zeros(num_nodes, 2)
    phi = torch.cat((torch.ones(num_nodes, 1), torch.zeros(num_nodes, 1)), dim=1).float()
    phi[data.PU_mask][Label["1"]] = prior
    phi[data.PU_mask][Label["-1"]] = 1 - prior
    phi[~data.PU_mask][Label["1"]] = 1
    phi[~data.PU_mask][Label["-1"]] = 0
    M = torch.full_like(data.edge_index.T, 0.5, dtype=torch.double)
    M_prev = M.clone()
    prods = torch.ones(num_nodes, 2).double()
    edge_to_index = {}
    for index in range(num_edges):
        i, j = data.edge_index[0][index], data.edge_index[1][index]
        i, j = int(i), int(j)
        prods[j][Label["1"]] *= M[index][Label["1"]]
        prods[j][Label["-1"]] *= M[index][Label["-1"]]
        edge_to_index[(i, j)] = index
    #-------------------------------------#
    epoch = 0
    MIN = 1e-20
    while True:
        epoch += 1
        for index in range(num_edges):
            i, j = data.edge_index[0][index], data.edge_index[1][index] 
            i, j = int(i), int(j)
            for v in [Label["1"], Label["-1"]]:
                M[index][v] = 0
                for u in [Label["1"], Label["-1"]]:
                    psi = eps if u == v else 1 - eps
                    assert prods[i][u] == prods[i][u]
                    M[index][v] += phi[i][u] * psi * prods[i][u] / (M_prev[edge_to_index[(j, i)]][u] + MIN)
                assert M[index][v] == M[index][v]

        if epoch > 5 or has_converged(M_prev, M):
            print("Converged in {} epochs".format(epoch))
            for j in range(num_nodes):
                u = Label["1"]
                T = 0
                for v in [Label["1"], Label["-1"]]:
                    T += phi[j][v] * prods[j][v]
                print(T)
                b[j][u] = phi[j][u] * prods[j][u] / (T + MIN)
                b[j][Label["-1"]] = 1 - b[j][Label["1"]]

            return b
        
        prods = torch.ones(num_nodes, 2).double().to(device)
        for index in range(num_edges):
            i, j = data.edge_index[0][index], data.edge_index[1][index]
            i, j = int(i), int(j)
            for u in [Label["1"], Label["-1"]]:
                if M[index][u] != M[index][u]:
                    M[index][u] = MIN
                
                prods[j][u] *= M[index][u]
                M_prev[index][u] = M[index][u]
        
        if epoch % 10 == 0: 
            print("epoch : ", epoch)
def GRAB(data):
    l_new = float("inf")
    prior_new = 0
    epoch = 0
    while True:
        l, prior = l_new, prior_new
        if epoch % 10 == 0:
            print("GRAB epoch : ", epoch)
        B = LBP(prior, data)
        print(B)
        model = GCN(data.num_features, 1)
        optimizer = torch.optim.Adam(model.parameters())
        loss_func = GRABLoss()
        for epoch in range(1000):
            model.train()
            optimizer.zero_grad()
            out = model(data)
            loss = loss_func(out, data.y_train.view(-1, 1), B)
            loss.backward()
            optimizer.step()
            if epoch % 100 == 0:
                print('Epoch: {:03d}, Loss: {:.5f}'.format(epoch, loss.item()))
        
        model.eval()
        out = model(data)
        l_new = loss_func(out, data.y.view(-1, 1), B)
        pred = out > 0.5
        prior_new = torch.sum(pred[data.PU_mask]) / torch.sum(data.PU_mask)
        if l_new > l:
            break
        l = l_new
        epoch += 1

    return prior_new, l_new, model

In [4]:
from torch_geometric.datasets import Planetoid
import torch
from data_loading import parse_planetoid_data

class Args:
    dataset = 'Cora'
    known_prior = False

args = Args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')


dataset = Planetoid(root='../data', name=args.dataset)
data, prior = parse_planetoid_data(dataset, known_prior=args.known_prior, device=device)

In [17]:
prior = torch.tensor(0.0)
LBP(prior, data)

Converged in 6 epochs
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0.9000, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0.9000, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(4.8566e-25, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(7.3605e-54, dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(0., dtype=torch.flo

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]])

prior = torch.tensor(0.4)
LBP(prior, data)

In [45]:
a

tensor(0.)

In [46]:
b

tensor(3144.9319, grad_fn=<AddBackward0>)

In [47]:
c

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 1)
)