In [9]:
from tqdm import tqdm
import itertools
from collections import defaultdict 
import numpy as np
from snpp.utils.matrix import load_sparse_csr, \
    split_train_test
from snpp.utils.signed_graph import matrix2graph


dataset = 'slashdot'
raw_mat_path = 'data/{}.npz'.format(dataset)
random_seed = 123456

In [3]:
m = load_sparse_csr(raw_mat_path)

print('split_train_test')
train_m, test_m = split_train_test(
    m,
    weights=[0.9, 0.1])

test_entries = set(tuple(sorted((i, j)))
                   for i, j in zip(*test_m.nonzero()))
g = matrix2graph(m, None)

split_train_test


1291it [00:00, 6506.56it/s]

building MultiGraph


516575it [01:32, 5584.05it/s]


In [4]:
# getting triangles
nodes_nbrs = g.adj.items()

triangles = set()
for v, v_nbrs in tqdm(nodes_nbrs):
    vs = set(v_nbrs) - set([v])
    ntriangles = 0
    for w in vs:
        ws = set(g[w]) - set([w])
        for u in vs.intersection(ws):
            triangles.add(tuple(sorted([u, v, w])))
        
print("{} triangles".format(len(triangles)))

100%|██████████| 77350/77350 [00:27<00:00, 2863.39it/s] 

548054 triangles





In [6]:
triangle_order_cnt = np.zeros(4)
for t in tqdm(triangles):
    its = filter(lambda e: tuple(sorted(e)) in test_entries,
                 itertools.combinations(t, 2))
    triangle_order_cnt[len(list(its))] += 1

print('the percetange of 0, 1, 2, 3-order triangles:')
print(triangle_order_cnt / np.sum(triangle_order_cnt) * 100)

100%|██████████| 548054/548054 [00:05<00:00, 100192.73it/s]


the percetange of 0, 1, 2, 3-order triangles:
[ 65.53660771  29.58832524   4.61852299   0.25654406]


In [19]:
edge2triangle_order = defaultdict(lambda: np.zeros(4))
for t in tqdm(triangles):
    edges = list(filter(lambda e: tuple(sorted(e)) in test_entries,
                       itertools.combinations(t, 2)))
    for e in edges:
       edge2triangle_order[e][len(edges)] += 1
    
m = np.array(list(edge2triangle_order.values()))
print(m.shape)

print('mean of triangle order count on edges: {}'.format(np.mean(m, axis=0)))
print('median of triangle order count on edges: {}'.format(np.median(m, axis=0)))
print('max of triangle order count on edges: {}'.format(np.max(m, axis=0)))
print('min of triangle order count on edges: {}'.format(np.min(m, axis=0)))
                                                                 


100%|██████████| 548054/548054 [00:04<00:00, 115914.85it/s]

(26788, 4)
mean of triangle order count on edges: [ 0.          6.05345677  1.8898014   0.15745856]
median of triangle order count on edges: [ 0.  2.  1.  0.]
max of triangle order count on edges: [   0.  434.   91.    8.]
min of triangle order count on edges: [ 0.  0.  0.  0.]



