In [20]:
from tqdm import tqdm
import itertools
from collections import defaultdict 
import numpy as np
from snpp.utils.matrix import load_sparse_csr, \
    split_train_test
from snpp.utils.signed_graph import matrix2graph


dataset = 'epinions'
raw_mat_path = 'data/{}.npz'.format(dataset)
random_seed = 123456

In [21]:
m = load_sparse_csr(raw_mat_path)

print('split_train_test')
train_m, test_m = split_train_test(
    m,
    weights=[0.9, 0.1])

test_entries = set(tuple(sorted((i, j)))
                   for i, j in zip(*test_m.nonzero()))
g = matrix2graph(m, None)

split_train_test


690it [00:00, 6891.17it/s]

building MultiGraph


841372it [02:43, 5141.68it/s]


In [22]:
# getting triangles
nodes_nbrs = g.adj.items()

triangles = set()
for v, v_nbrs in tqdm(nodes_nbrs):
    vs = set(v_nbrs) - set([v])
    ntriangles = 0
    for w in vs:
        ws = set(g[w]) - set([w])
        for u in vs.intersection(ws):
            triangles.add(tuple(sorted([u, v, w])))
        
print("{} triangles".format(len(triangles)))

100%|██████████| 131828/131828 [01:49<00:00, 1202.76it/s]

4910076 triangles





In [23]:
triangle_order_cnt = np.zeros(4)
for t in tqdm(triangles):
    its = filter(lambda e: tuple(sorted(e)) in test_entries,
                 itertools.combinations(t, 2))
    triangle_order_cnt[len(list(its))] += 1

print('the percetange of 0, 1, 2, 3-order triangles:')
print(triangle_order_cnt / np.sum(triangle_order_cnt) * 100)

100%|██████████| 4910076/4910076 [00:47<00:00, 102312.87it/s]

the percetange of 0, 1, 2, 3-order triangles:
[ 65.28980407  29.83691087   4.62760658   0.24567848]





In [24]:
edge2triangle_order = defaultdict(lambda: np.zeros(4))
for t in tqdm(triangles):
    edges = list(filter(lambda e: tuple(sorted(e)) in test_entries,
                       itertools.combinations(t, 2)))
    for e in edges:
       edge2triangle_order[e][len(edges)] += 1
    
m = np.array(list(edge2triangle_order.values()))
print(m.shape)

print('mean of triangle order count on edges: {}'.format(np.mean(m, axis=0)))
print('median of triangle order count on edges: {}'.format(np.median(m, axis=0)))
print('max of triangle order count on edges: {}'.format(np.max(m, axis=0)))
print('min of triangle order count on edges: {}'.format(np.min(m, axis=0)))
                                                                 


100%|██████████| 4910076/4910076 [00:44<00:00, 110105.93it/s]


(65669, 4)
mean of triangle order count on edges: [  0.          22.30908039   6.92012974   0.55108194]
median of triangle order count on edges: [ 0.  9.  2.  0.]
max of triangle order count on edges: [   0.  539.  125.   15.]
min of triangle order count on edges: [ 0.  0.  0.  0.]
