In [1]:
%matplotlib notebook

In [2]:
import numpy as np
import os
import networkx as nx
import seaborn as sns


from numpy.linalg import eigh
from scipy import sparse as sp
from scipy.sparse import issparse
from matplotlib import pyplot as plt
from collections import defaultdict
from functools import reduce

from sklearn.cluster import KMeans
from scipy.sparse.linalg import eigs

from helpers import (
    signed_layout, draw_nodes, draw_edges, flatten, 
    dict2array, signed_group_conductance,
    conductance_by_sweeping,
    get_borderless_fig, 
    labels2groups,
    num_ccs, cc_sizes
) 
from sweeping import sweeping_scores_using_ppr
from motif_adjacency import motif_eef_anchored, motif_fff, motif_ff

# graph transformation

In [3]:
g = nx.Graph()
g.add_edges_from([
    (0, 1, {'sign': 1}),
    (1, 2, {'sign': -1}),
    (0, 2, {'sign': -1}),
])

In [4]:
A = nx.adjacency_matrix(g, weight='sign')
A.todense()

matrix([[ 0,  1, -1],
        [ 1,  0, -1],
        [-1, -1,  0]], dtype=int64)

In [5]:
expected = np.array([    
    [1, 1, 0],
    [1, 1, 0],
    [0, 0, 2]
])

In [6]:
def transform_via_selfloop_rewiring(g):
    A = nx.adjacency_matrix(g, weight='sign')
    pos_A = A.copy()
    pos_A[A < 0] = 0
    pos_A.eliminate_zeros()

    neg_deg = sp.diags(flatten((A < 0).sum(axis=0)))

    A_new = pos_A + neg_deg
    return nx.from_scipy_sparse_matrix(A_new)

In [7]:
g_new = transform_via_selfloop_rewiring(g)

In [8]:
assert (nx.adjacency_matrix(g_new).todense() == expected).all()

# experiment on synthetic graphs

In [36]:
del g
del g_new

In [108]:
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed

from helpers import signed_layout, draw_nodes, draw_edges
from graph_generator.community_graph import make
from sweeping import sweeping_scores_using_ppr
from sklearn.metrics import precision_recall_fscore_support
from helpers import make_range




In [109]:
def noisy_level(g):
    edge_labels = np.array([g[u][v]['label'] for u, v in g.edges()])    
    return 1 - edge_labels.sum() / edge_labels.shape[0]

In [139]:
g, groundtruth = make(25, 10, 0.8, 0.1, 0.2, 1-0.2)
node2comm = {n: c for c, comm in enumerate(groundtruth) for n in comm}
g_new = transform_via_selfloop_rewiring(g)
_, _, _, sweep_positions, sweep_scores_signed = run_experiment(
    g, g_new, 0, alpha, groundtruth, node2comm, verbose=True, debug=True, 
    use_signed_conductance=True
)

_, _, _, sweep_positions, sweep_scores_unsigned = run_experiment(
    g, g_new, 0, alpha, groundtruth, node2comm, verbose=True, debug=True, 
    use_signed_conductance=False
)


fig, ax = plt.subplots(1, 1)
ax.plot(sweep_positions, sweep_scores_signed)
ax.plot(sweep_positions, sweep_scores_unsigned)

ax.set_title('sweep profile plot')
ax.legend(['signed $\phi$', 'unsigned $\phi$'], loc='best')

  


<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fb4745a6a90>

In [119]:
alpha = 0.85

def run_experiment(
    g, g_new, query, alpha, groundtruth, node2comm, verbose=False, debug=False,
    use_signed_conductance=True
):
    """
    prec, rec, f1 = run_experiment(g_new, q, alpha, groundtruth, node2comm)
    """
    if use_signed_conductance:
        order, sweep_positions, sweep_scores = sweeping_scores_using_ppr(
            g_new, query, alpha, 
            A=nx.adjacency_matrix(g_new), 
            signed_A=nx.adjacency_matrix(g, weight='sign'),
            conductance_measure='signed'
        )
    else:
        order, sweep_positions, sweep_scores = sweeping_scores_using_ppr(
            g_new, query, alpha, A=nx.adjacency_matrix(g_new),
            conductance_measure='unsigned'
        )
        
    best_pos = np.argmin(sweep_scores)

    pred_comm = np.zeros(g_new.number_of_nodes())
    pred_comm[order[:best_pos+1]] = 1

    true_comm = np.zeros(g_new.number_of_nodes())
    true_comm[groundtruth[node2comm[query]]] = 1


    prec, rec, f1, supp = precision_recall_fscore_support(true_comm, pred_comm, average=None)
    if debug:
        return prec[1], rec[1], f1[1], sweep_positions, sweep_scores
    else:
        return prec[1], rec[1], f1[1]

In [118]:
def experiment_under_config(
    internal_neg_ratio = 0.2, external_pos_ratio = 0.1, rep=10,
    use_signed_conductance=True
):
    eval_results = []
    noisy_levels = []
    for _ in tqdm(range(rep), total=rep):
        g, groundtruth = make(25, 10, 0.8, internal_neg_ratio, 0.2, 1-external_pos_ratio)
        node2comm = {n: c for c, comm in enumerate(groundtruth) for n in comm}
        
        g_new = transform_via_selfloop_rewiring(g)        

        eval_results += [run_experiment(g, g_new, q, alpha, groundtruth, node2comm,
                                        use_signed_conductance=use_signed_conductance)
                         for q in g.nodes()]
        
        noisy_levels.append(noisy_level(g))
        
    eval_df = pd.DataFrame(eval_results, columns=['prec', 'rec', 'f1'])
    runtime = dict(
        internal_neg_ratio=internal_neg_ratio,
        external_pos_ratio=external_pos_ratio,
        rep=rep,
        noisy_levels=noisy_levels
    )
    return eval_df.describe(), runtime

In [120]:
use_signed_conductance = False
all_results = Parallel(n_jobs=-1)(
    delayed(experiment_under_config)(internal_neg_ratio=inr, external_pos_ratio=epr, rep=10,
                                     use_signed_conductance=use_signed_conductance)
    for inr in make_range(0.1, 0.5)
    for epr in make_range(0.1, 0.5)
)
rows = []
for summ, runtime in all_results:
    rows.append(
        (summ['prec']['mean'], summ['rec']['mean'], summ['f1']['mean'], runtime['external_pos_ratio'], runtime['internal_neg_ratio'], np.mean(runtime['noisy_levels']))
    )
eval_df = pd.DataFrame(rows, columns=['prec', 'rec', 'f1', 'ext_pos_ratio', 'int_neg_ratio', 'noise_level'])
eval_df.to_pickle('outputs/eval_results_community_graph_n25_k10_signed={}.pkl'.format(use_signed_conductance))

100%|██████████| 10/10 [01:33<00:00,  9.42s/it]
100%|██████████| 10/10 [01:37<00:00,  9.68s/it]
100%|██████████| 10/10 [01:49<00:00, 10.87s/it]
100%|██████████| 10/10 [01:52<00:00, 11.36s/it]
100%|██████████| 10/10 [02:02<00:00, 12.21s/it]
100%|██████████| 10/10 [02:10<00:00, 13.09s/it]
100%|██████████| 10/10 [02:27<00:00, 14.78s/it]
100%|██████████| 10/10 [02:39<00:00, 16.22s/it]
100%|██████████| 10/10 [01:22<00:00,  8.34s/it]
100%|██████████| 10/10 [01:37<00:00,  9.83s/it]
100%|██████████| 10/10 [01:17<00:00,  7.82s/it]
100%|██████████| 10/10 [02:28<00:00, 14.82s/it]
100%|██████████| 10/10 [02:01<00:00, 12.02s/it]
100%|██████████| 10/10 [02:36<00:00, 15.64s/it]
100%|██████████| 10/10 [02:15<00:00, 13.56s/it]
100%|██████████| 10/10 [01:34<00:00,  9.36s/it]
100%|██████████| 10/10 [02:31<00:00, 15.36s/it]
100%|██████████| 10/10 [01:12<00:00,  7.20s/it]
100%|██████████| 10/10 [01:51<00:00, 11.19s/it]
100%|██████████| 10/10 [01:31<00:00,  9.20s/it]
100%|██████████| 10/10 [02:08<00:00, 13.

In [121]:
df_signed = pd.read_pickle('outputs/eval_results_community_graph_n25_k10_signed=True.pkl')
df_unsigned = pd.read_pickle('outputs/eval_results_community_graph_n25_k10_signed=False.pkl')

In [134]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np

aspect = 'f1'
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(df_signed['ext_pos_ratio'], df_signed['int_neg_ratio'], df_signed[aspect])
ax.plot_trisurf(df_unsigned['ext_pos_ratio'], df_unsigned['int_neg_ratio'], df_unsigned[aspect], color='red')
ax.set_xlabel('ext. pos.')
ax.set_ylabel('int. neg.')
# ax.legend(['sweep on signed conductance', 'sweep on conductance'])
ax.set_zlabel(aspect)

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x7fb45d2fbf98>