In [4]:
import os

import pandas as pd

from graphs import load_dataset_to_graph
from measurements import get_graph_measurements, collect_graph_measurements, MEASUREMENTS
from recreate_graph import recreate_by_priority_rank_random_rankings, \
    recreate_by_priority_rank, get_trained_model

In [2]:
prepared_datasets_path = 'prepared_datasets'
prepared_dataframes_path = 'prepared_dataframes'
delimiter = '\t'

prepared_dataset_names = [
    'primary_school', 'workplace', 
    'highschool_2011', 'highschool_2012', 'hospital', 
    'moreno_blogs', 'moreno_sheep', 'moreno_seventh',
     # big datasets
    'petster-hamster', 'email-Eu'
]

comparison_dir = 'results'
number_of_comparisons = 10

In [3]:
for dataset_name in prepared_dataset_names:
    print('Processing {}...'.format(dataset_name))
    # paths
    dataset_path = os.path.join(prepared_datasets_path, dataset_name)
    dataframe_path = os.path.join(prepared_dataframes_path, dataset_name)
    results_dir = os.path.join(comparison_dir, dataset_name)
    original_file = os.path.join(results_dir, 'original.csv')
    
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
         
    # load original graph and get measurements
    graph = load_dataset_to_graph(dataset_path)
    graph_measurements = get_graph_measurements(graph)
    
    # make 5 types of analysis
    for analysis_name in ['random_rankings', 'graph_attrs', 'real_attrs', 'graph_real_attrs', 'real_y']:
        print('{}'.format(analysis_name))
        results_file = os.path.join(results_dir, analysis_name + '.csv')
        results = []
        
        # handle random rankings case
        if analysis_name == 'random_rankings':
            for i in range(number_of_comparisons):
                print('Recreation number {}...'.format(i))
                new_graph = recreate_by_priority_rank_random_rankings(graph)

                new_graph_measurements = get_graph_measurements(new_graph)
                measurement_values = collect_graph_measurements(graph_measurements, new_graph_measurements)
                results.append(measurement_values)
        # handle real y case
        elif analysis_name == 'real_y':
            df_path = os.path.join(dataframe_path, 'no_attrs.csv')
            df = pd.read_csv(df_path, delimiter=delimiter)
            
            for i in range(number_of_comparisons):
                print('Recreation number {}...'.format(i))
                new_graph = recreate_by_priority_rank(graph, df['num_of_edges'].as_matrix())
                new_graph_measurements = get_graph_measurements(new_graph)
                
                measurement_values = collect_graph_measurements(graph_measurements, new_graph_measurements)
                results.append(measurement_values)
        # handle graph attrs, real attrs, graph and real attrs cases
        else:
            df_path = os.path.join(dataframe_path, analysis_name + '.csv')
            df = pd.read_csv(df_path, delimiter=delimiter)
            
            model = get_trained_model(df, epochs=16)
            # drop target column
            X_test = df.drop(['num_of_edges'], axis=1)
            # predict num_edges
            y_pred = model.predict(X_test)
            
            for i in range(number_of_comparisons):
                print('Recreation number {}...'.format(i))
                new_graph = recreate_by_priority_rank(graph, y_pred)
                new_graph_measurements = get_graph_measurements(new_graph)
                
                measurement_values = collect_graph_measurements(graph_measurements, new_graph_measurements)
                results.append(measurement_values)
        # save measurements of recreated graphs
        results_df = pd.DataFrame(results)
        results_df.to_csv(results_file, index=False, sep=delimiter)
        
    # save original graph scalar measurements
    scalar_measurements = {k: v for k, v in graph_measurements.items() if MEASUREMENTS[k] == 'value'}
    orig_df = pd.DataFrame([scalar_measurements])
    orig_df.to_csv(original_file, index=False, sep=delimiter)

Processing primary_school...
random_rankings
Recreation number 0...


  return (xy*(M-ab)).sum()/numpy.sqrt(vara*varb)


Recreation number 1...
Recreation number 2...


  return (xy*(M-ab)).sum()/numpy.sqrt(vara*varb)


Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
graph_attrs
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.0007454208608016859, accuracy: 0.7159688545864353
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...


  return (xy*(M-ab)).sum()/numpy.sqrt(vara*varb)


Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
real_attrs
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.0005565515670962711, accuracy: 0.7159688545864353
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
graph_real_attrs
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.0005094060769552162, accuracy: 0.7159688545864353
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreat

Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.0007644882959438878, accuracy: 0.7846434870244394
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
real_y
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
Processing highschool_2012...
random_rankings
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
graph_attrs
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoc

Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.009804297880810966, accuracy: 0.9888055555555556
Recreation number 0...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 1...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 2...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 3...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 4...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 5...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 6...
Cannot compute diameter - Found infinite path length because the digraph is not st

Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
real_y
Recreation number 0...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 1...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 2...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 3...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 4...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 5...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 6...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 7...
Cannot compute di

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.17138796417523225, accuracy: 0.5529131986794376
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
real_attrs
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
loss: 0.14136920406900602, accuracy: 0.5529131986794376
Recreation number 0...
Recreation number 1...
Recreation number 2...
Recreation number 3...
Recreation number 4...
Recreation number 5...
Recreation number 6...
Recreation number 7...
Recreation number 8...
Recreation number 9...
graph_real_attrs
Epoch 1/16
Epoch 2/16
Epoch 3/16
E

Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 1...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 2...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 3...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 4...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 5...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 6...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 7...
Cannot compute diameter - Found infinite path length because the digraph is not strongly connected
Recreation number 8...
Cannot compute diameter 