In [1]:
%cd ..

/home/ead/iking5/code/StreamLP


In [52]:
import torch 
import matplotlib.pyplot as plt 
from torch_geometric.nn import MessagePassing 
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score, precision_score, recall_score
from math import ceil

In [65]:
def test_nonparam_lp(d, aggr='mean', dataset='digg'):
    g = torch.load(f'StrGNN_Data/{dataset}.pt')

    rnd_x = torch.eye(d).repeat(
        ceil(g.num_nodes / d), 1
    )[:g.num_nodes]
    rnd_x = rnd_x[torch.randperm(rnd_x.size(0))]
    
    rnd_x = torch.rand(g.num_nodes,d)

    ei = g.edge_index[:, g.edge_index.size(1)//2:]
    te_ei = g.edge_index[:, :g.edge_index.size(1)//2]

    mp = MessagePassing(aggr=aggr)
    one_hop = mp.propagate(ei, size=None, x=rnd_x)
    two_hops = mp.propagate(ei, size=None, x=one_hop)
    three_hops = mp.propagate(ei, size=None, x=two_hops)

    x = torch.cat([one_hop,two_hops,three_hops], dim=1)
    edges = torch.cat([x[ei[0]], x[ei[1]]], dim=1)
    fake = torch.cat([
        x[torch.randint(0, g.num_nodes, (edges.size(0),))], 
        x[torch.randint(0, g.num_nodes, (edges.size(0),))]
    ], dim=1)

    y = torch.ones(edges.size(0)*2)
    y[:y.size(0)//2] = 0
    rf = RandomForestClassifier(n_jobs=16)
    rf.fit(torch.cat([edges, fake], dim=0), y)

    te_edges = torch.cat([x[te_ei[0]], x[te_ei[1]]], dim=1)
    te_fake = torch.cat([
        x[torch.randint(0, g.num_nodes, (te_edges.size(0),))], 
        x[torch.randint(0, g.num_nodes, (te_edges.size(0),))]
    ], dim=1)

    y_hat = rf.predict(torch.cat([te_edges, te_fake], dim=0))
    labels = torch.ones(te_edges.size(0)*2)
    labels[:labels.size(0)//2] = 0 

    print(
        accuracy_score(labels, y_hat),
        precision_score(labels, y_hat),
        recall_score(labels, y_hat),
        sep='\t'
    )


for a in ['mean', 'sum', 'max']:
    print(a)
    print("d\tAccuracy\t\tPrecision\t\tRecall")
    for d in [2,4,8,16,32,64,128]:
        print(str(d)+'\t', end='')
        test_nonparam_lp(d, aggr=a)

    print()

mean
d	Accuracy		Precision		Recall
2	0.6289913952479858	0.5951414959344119	0.8068838016113938
4	0.6315248898728688	0.5973839420005745	0.8068153287836943
8	0.6314450049072193	0.5975473423896474	0.8051948051948052
16	0.6315134777349188	0.5977670693633772	0.8040992399516126
32	0.6304635610435259	0.5972704376829351	0.8010864355328328
64	0.6324949215986123	0.5982299986462705	0.8069066258872937
128	0.6290142195238856	0.5956899324541653	0.803140620363819

sum
d	Accuracy		Precision		Recall
2	0.62238376737498	0.5878411585465745	0.8190034921142126
4	0.6239129938602698	0.5893015758134026	0.8177025083879214
8	0.6231940291694246	0.5887119307069014	0.8175427384566225
16	0.623273914135074	0.589107767438791	0.8149864195558396
32	0.623582041859722	0.5898388889810682	0.8113801839636637
64	0.6237874603428206	0.5896342571933826	0.8143016912788442
128	0.6245406614475156	0.5903229544288292	0.8139593271403465

max
d	Accuracy		Precision		Recall
2	0.6183324584027572	0.5852223226760911	0.8125898705863557
4	0.62

In [51]:
g.edge_index[:,:25]

tensor([[  0,  50,  90, 123,  33, 151, 116, 128, 175, 175, 151, 189, 151, 230,
         244, 246, 268, 324, 226,  50, 360, 391, 388, 196, 309],
        [  1,  51,  91, 124,  34, 152, 117, 163, 176, 151, 233, 190, 267, 231,
         245, 245, 267, 325, 151, 360, 360, 392, 389, 360, 310]])