In [38]:
from generator import *
from NetEMD_shiwen import *
import numpy as np
import community
import networkx as nx
from utils import generate_null_model, comm_eigenvectors, partition_graph
from scipy.stats import norm
from com_detection import augmentation
import logging

In [14]:
graph = ER_generator(n=1000, seed=None)
graph = draw_anomalies(graph)

Adding 7 anomalies...
The 1th anomaly...
0
15
Adding rings...
The 2th anomaly...
15
20
Adding paths...
[182 964 721  63 521]
The 3th anomaly...
20
25
Adding rings...
The 4th anomaly...
25
38
Adding rings...
The 5th anomaly...
38
46
Adding stars...
The 6th anomaly...
46
60
Adding stars...
The 7th anomaly...
60
73
Adding stars...


In [42]:
def compute_strength(g, strength_type=None, normalize=False):
    if strength_type == 'in':
        strength = dict(g.in_degree(weight='weight'))
    elif strength_type == 'out':
        strength = dict(g.out_degree(weight='weight'))
    else:
        strength = dict(g.degree(weight='weight'))
    if normalize:
        values = np.array(list(strength.values()))
        normed = values / np.std(values)
    return {node: v for node, v in zip(strength.keys(), normed)}

In [47]:
def compute_strength_score(g, references, null_samples):
    """Compute one statistics on a graph(subgraph)
    """
    # in strength
    obs_stat = compute_strength(g, 'in')
    ref_stats = [compute_strength(ref, 'in', normalize=True) for ref in references]
    null_stats = [compute_strength(n_samp, 'in', normalize=True) for n_samp in null_samples]
    try:
        in_strength_1, in_strength_2 = NetEMD_score(obs_stat, ref_stats, null_stats)
    except:
        print(obs_stat)
    # out_strength
    obs_stat = compute_strength(g, 'out')
    ref_stats = [compute_strength(ref, 'out', normalize=True) for ref in references]
    null_stats = [compute_strength(n_samp, 'out', normalize=True) for n_samp in null_samples]
    out_strength_1, out_strength_2 = NetEMD_score(obs_stat, ref_stats, null_stats)
    # in_out_strength
    obs_stat = compute_strength(g)
    ref_stats = [compute_strength(ref, normalize=True) for ref in references]
    null_stats = [compute_strength(n_samp, normalize=True) for n_samp in null_samples]
    in_out_strength_1, in_out_strength_2 = NetEMD_score(obs_stat, ref_stats, null_stats)
    
    return in_strength_1, in_strength_2, out_strength_1, out_strength_2, in_out_strength_1, in_out_strength_2

In [48]:
def NetEMD_features(graph, num_references=15, num_samples=500, n=10000, p=0.001):
    global stats
    logging.info("partition graph")
    communities = [graph.subgraph(comm_nodes) for comm_nodes in partition_graph(graph)]
    logging.info("got {} communities".format(len(communities)))
    logging.info("generating references")
    references = generate_null_model(num_models=num_references, min_size=5, n=n, p=p)
    logging.info("generating null samples")
    null_samples = generate_null_model(num_models=num_samples, min_size=5, n=n, p=p)
    for comm_idx, community in enumerate(communities):
        logging.info("computing strength scores for community No.{}".format(comm_idx))
        strength_scores = compute_strength_score(community, references, null_samples)
        strength_names = ['in_strength_1', 'in_strength_2', 'out_strength_1', 'out_strength_2', 'in_out_strength_1', 'in_out_strength_2']
        for strength_name, strength_score in zip(strength_names, strength_scores):
            for node, score in strength_score.items():
                assert graph[node].get(strength_name) is None
                graph[node][strength_name] = score
        logging.info("computing motif scores for community No.{}".format(comm_idx))
        motif_scores = compute_motif_score(community, references, null_samples)
        for idx in range(13):
            motif_score = motif_scores[idx]
            motif_id = idx + 4
            for score_idx in [1, 2]:
                for node, score in motif_score[score_idx-1].items():
                    assert graph[node].get('motif_{}_{}'.format(motif_id, score_idx)) is None
                    graph[node]['motif_{}_{}'.format(motif_id, score_idx)] = score
    logging.info("generating augmented graph")
    graph_aug = augmentation(graph)
    logging.info("partition augmented graph")
    communities_aug = [graph_aug.subgraph(comm_nodes) for comm_nodes in partition_graph(graph_aug)]
    logging.info("get {} augmented communities".format(len(communities_aug)))
    logging.info("generating augmented refrences")
    references_aug = generate_null_model(num_models=num_references, min_size=5, n=n, p=p, augment=True)
    logging.info("generating augmented null samples")
    null_samples_aug = generate_null_model(num_models=num_samples, min_size=5, n=n, p=p, augment=True)
    matrix_names = ['upper', 'lower', 'comb', 'rw']
    for comm_idx, community in enumerate(communities_aug):
        logging.info("computing matrix scores for community No.{}".format(comm_idx))
        matrix_scores = compute_matrix_score(community, references_aug, null_samples_aug) # 4 tuples of (score1, score2)
        for matrix_idx, matrix_name in enumerate(matrix_names):
            for node in community.nodes():
                graph[node]['{}_1'.format(matrix_name)] = matrix_scores[matrix_idx][0][node]
                graph[node]['{}_2'.format(matrix_name)] = matrix_scores[matrix_idx][1][node]
    
    return graph

In [49]:
graph = NetEMD_features(graph, num_references=2, num_samples=2, n=500)

root - INFO - partition graph
root - INFO - got 51 communities
root - INFO - generating references
root - INFO - Generating 1-th null model
root - INFO - Partitioning graph
root - INFO - Generating 2-th null model
root - INFO - Partitioning graph
root - INFO - generating null samples
root - INFO - Generating 1-th null model
root - INFO - Partitioning graph
root - INFO - Generating 2-th null model
root - INFO - Partitioning graph
root - INFO - computing strength scores for community No.0


None


AttributeError: 'NoneType' object has no attribute 'values'