In [1]:
# max_balance that uses only strong balance (equivalent to making cluster labels universally identical)
# iterative approach that uses weak balance
# predict 100, 200, ..., 2000 edges

# result:
# plot of two lines
# x-axis: edge count (budget)
# y-axis: accuracy


In [2]:

import _pickle as pkl
import networkx as nx
import numpy as np
import random

from tqdm import tqdm
from snpp.cores.lowrank import alq_spark, predict_signs
from snpp.utils.matrix import split_train_test, load_sparse_csr
from snpp.utils.signed_graph import g2m
from snpp.utils.data import load_train_test_graphs
from snpp.utils.edge_filter import filter_by_min_triangle_count
from snpp.utils.evaluation import accuracy

from snpp.utils.spark import sc

dataset = 'slashdot'
lambda_ = 0.2
k = 10
max_iter = 100
random_seed = 123456
min_tri_count = 30

recache_input = False

random.seed(random_seed)
np.random.seed(random_seed)

In [3]:
train_g, test_g = load_train_test_graphs(dataset, recache_input)
train_g_ud = train_g.to_undirected()

loading train and test graphs...


In [4]:
confident_edges = set(filter_by_min_triangle_count(train_g_ud, test_g.edges(), min_tri_count))

In [5]:
from snpp.cores.joint_part_pred import iterative_approach
from snpp.cores.max_balance import faster_greedy
from snpp.cores.lowrank import partition_graph
from snpp.cores.budget_allocation import constant_budget
from snpp.cores.triangle import build_edge2edges

common_params = dict(
    g=train_g_ud,
    T=confident_edges,
    k=k,
    budget_allocation_f=constant_budget,
    solve_maxbalance_f=faster_greedy,
    solve_maxbalance_kwargs={'edge2edges': build_edge2edges(train_g_ud.copy(),
                                                            confident_edges)},
    truth=set([(i, j, test_g[i][j]['sign'])
               for i, j in confident_edges]),
    perform_last_partition=False
)

 21%|██        | 373/1819 [00:00<00:00, 1878.81it/s]

build edge2edges


100%|██████████| 1819/1819 [00:01<00:00, 1760.19it/s]


In [None]:
# storng balance approach
def partition_into_one(g, *args, **kwargs):
    return np.ones(g.number_of_nodes())

part, strong_preds, _ = iterative_approach(
    graph_partition_f=partition_into_one,
    budget_allocation_kwargs=dict(const=len(confident_edges)),
    **common_params)

print(" => accuracy {} (strong balance)".format(accuracy(test_g, strong_preds)))

iteration=1, #remaining targets=1819
graph partitioning...
solving max_balance
building triangle_count_by_edge
edge2edges (size 1668) is given
✓ assigning 1 to (74, 87) produces 187 more balanced triangles (1, 188): (('s+1', 188), ('s-1', 1))
✓ assigning 1 to (75, 74) produces 185 more balanced triangles (15, 200): (('s+1', 200), ('s-1', 15))
✓ assigning 1 to (913, 867) produces 183 more balanced triangles (0, 183): (('s+1', 183),)
✓ assigning 1 to (648, 936) produces 183 more balanced triangles (2, 185): (('s+1', 185), ('s-1', 2))
✓ assigning 1 to (74, 82) produces 176 more balanced triangles (6, 182): (('s+1', 182), ('s-1', 6))
✓ assigning 1 to (87, 936) produces 174 more balanced triangles (0, 174): (('s+1', 174),)
✓ assigning 1 to (867, 74) produces 172 more balanced triangles (2, 174): (('s+1', 174), ('s-1', 2))
✓ assigning 1 to (83, 936) produces 167 more balanced triangles (1, 168): (('s+1', 168), ('s-1', 1))
✓ assigning 1 to (43, 34) produces 164 more balanced triangles (15, 17

In [None]:
part, weak_preds, _ = iterative_approach(
    graph_partition_f=partition_graph,
    graph_partition_kwargs=dict(sc=sc,
                                lambda_=lambda_,
                                iterations=max_iter,
                                seed=random_seed),
    budget_allocation_kwargs=dict(const=200),
    **common_params)
print(" => accuracy {} (weak balance)".format(accuracy(test_g, weak_preds)))

# Accuracy on 2645 predictions is 0.9629489603024575

iteration=1, #remaining targets=1819
graph partitioning...
to_scipy_sparse_matrix


100%|██████████| 77357/77357 [00:01<00:00, 58012.29it/s]
