In [1]:
import os

from graphs import load_dataset_to_graph, attach_graph_attributes, attach_real_attributes
from measurements import get_graph_measurements, compare_graph_measurements, print_comparison_results
from recreate_graph import graph_to_training_dataframe, preprocess_dataframe, get_trained_model, recreate_by_priority_rank

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
prepared_datasets_path = 'prepared_datasets'
prepared_dataset_names = [
    'primary_school', 'workplace', 
    'highschool_2011', 'highschool_2012', 'hospital', 
    'moreno_blogs', 'moreno_sheep', 'moreno_seventh'
]
prepare_big_dataset_names = [
    'petster-hamster', 'email-Eu'
]

for dataset_name in prepared_dataset_names:
    print('\nProcessing {}'.format(dataset_name))
    dataset_path = os.path.join(prepared_datasets_path, dataset_name)
    
    print('Loading graph...')
    graph = load_dataset_to_graph(dataset_path)
    
    print('Attaching graph attributes...')
    attach_graph_attributes(graph)
    
    print('Attaching real attributes...')
    attach_real_attributes(graph, dataset_path)
    
    print('Converting to dataframe...')
    df = graph_to_training_dataframe(graph)

    print('Preprocessing dataframe')
    df = preprocess_dataframe(df, graph.number_of_nodes())
    
    print('Training model...')
    model = get_trained_model(df, epochs=4)
    
    print('Recreating graph...')
    new_graph = recreate_by_priority_rank(graph, df, model)
    
    print('Comparing graphs...')
    graph_measurements = get_graph_measurements(graph)
    new_graph_measurements = get_graph_measurements(new_graph)
    comparison = compare_graph_measurements(graph_measurements, new_graph_measurements)
    print_comparison_results(comparison)


Processing primary_school
Loading graph...
Attaching graph attributes...
Attaching real attributes...
Converting to dataframe...
Preprocessing dataframe
Training model...
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
loss: 0.0011222473787267692, accuracy: 0.7159688545864353
Recreating graph...
Comparing graphs...
(KS test p-value)  degree_centrality: 7.75774287285543e-05            [passed: False]
(KS test p-value)  closeness_centrality: 3.4620957144498103e-06       [passed: False]
(KS test p-value)  betweenness_centrality: 0.003210231886294323       [passed: False]
(KS test p-value)  pagerank: 3.4557936477529565e-08                   [passed: False]
(abs distance)     average_shortest_path_length: 0.010104908946951702 [passed: True]
(abs distance)     diameter: 0.0                                      [passed: True]
(abs distance)     degree_centralization: 0.33710572290810414         [passed: False]
(abs distance)     closeness_centralization: 0.682831343407577        [passed: False]
(abs

  return (xy*(M-ab)).sum()/numpy.sqrt(vara*varb)


(KS test p-value)  degree_centrality: 0.0007516359865883434           [passed: False]
(KS test p-value)  closeness_centrality: 2.641725814496577e-05        [passed: False]
(KS test p-value)  betweenness_centrality: 7.5885154889784956e-06     [passed: False]
(KS test p-value)  pagerank: 0.005044986488469954                     [passed: False]
(abs distance)     average_shortest_path_length: 0.06818329965452052  [passed: True]
(abs distance)     diameter: 0.25                                     [passed: False]
(abs distance)     degree_centralization: 0.5503878566522026          [passed: False]
(abs distance)     closeness_centralization: 0.6060142896335171       [passed: False]
(abs distance)     betweenness_centralization: 0.0649911071000287     [passed: True]
(abs distance)     pagerank_centralization: 0.17881355858735276       [passed: False]
(abs distance)     density: 0.004972214097689335                      [passed: True]
(abs distance)     degree_assortativity: inf             

  return (xy*(M-ab)).sum()/numpy.sqrt(vara*varb)


(KS test p-value)  degree_centrality: 3.8616274725431134e-06          [passed: False]
(KS test p-value)  closeness_centrality: 2.2191981273387013e-06       [passed: False]
(KS test p-value)  betweenness_centrality: 0.0057724348889359985      [passed: False]
(KS test p-value)  pagerank: 1.899903318764338e-05                    [passed: False]
(abs distance)     average_shortest_path_length: 0.06050578034682078  [passed: True]
(abs distance)     diameter: 0.25                                     [passed: False]
(abs distance)     degree_centralization: 0.5029688273132129          [passed: False]
(abs distance)     closeness_centralization: 0.6322009844566814       [passed: False]
(abs distance)     betweenness_centralization: 0.03534254510806026    [passed: True]
(abs distance)     pagerank_centralization: 0.17007535262818982       [passed: False]
(abs distance)     density: 0.0135135135135135                        [passed: True]
(abs distance)     degree_assortativity: nan             

  return (xy*(M-ab)).sum()/numpy.sqrt(vara*varb)


(KS test p-value)  degree_centrality: 0.004275811717703132            [passed: False]
(KS test p-value)  closeness_centrality: 0.36785723773857243          [passed: True]
(KS test p-value)  betweenness_centrality: 5.10195346766571e-07       [passed: False]
(KS test p-value)  pagerank: 3.583793931914375e-05                    [passed: False]
(abs distance)     average_shortest_path_length: 0.0014659449706810855 [passed: True]
(abs distance)     diameter: 0.3333333333333333                       [passed: False]
(abs distance)     degree_centralization: 0.1972137570744445          [passed: False]
(abs distance)     closeness_centralization: 0.21148555998247284      [passed: False]
(abs distance)     betweenness_centralization: 0.2037489535461097     [passed: False]
(abs distance)     pagerank_centralization: 0.18072138109284167       [passed: False]
(abs distance)     density: 0.012291483757682187                      [passed: True]
(abs distance)     degree_assortativity: nan            