In [1]:
import os
import gc

from prepare_datasets import prepare_primary_school, prepare_workplace, prepare_highschool, prepare_hospital, \
    prepare_moreno_blogs, prepare_moreno_sheep, prepare_moreno_seventh, prepare_petster_hamster, prepare_email_eu
from graphs import load_dataset_to_graph
from graph_to_dataframe import export_training_dataframes

In [2]:
# Prepare datasets -> create weighted edge list .csv and node attributes .csv
prepare_primary_school('primary_school', 'primaryschool.csv', 'metadata_primaryschool.txt')
prepare_workplace('workplace', 'tij_InVS.dat', 'metadata_InVS13.txt')
prepare_highschool('highschool_2011', 'thiers_2011.csv', 'metadata_2011.txt')
prepare_highschool('highschool_2012', 'thiers_2012.csv', 'metadata_2012.txt')
prepare_hospital('hospital', 'detailed_list_of_contacts_Hospital.dat')
prepare_moreno_blogs('moreno_blogs', 'out.moreno_blogs_blogs', 'ent.moreno_blogs_blogs.blog.orientation')
prepare_moreno_sheep('moreno_sheep', 'out.moreno_sheep_sheep', 'ent.moreno_sheep_sheep.sheep.age')
prepare_moreno_seventh('moreno_seventh', 'out.moreno_seventh_seventh', 'ent.moreno_seventh_seventh.student.gender')
prepare_petster_hamster('petster-hamster', 'out.petster-hamster', 'ent.petster-hamster')
prepare_email_eu('email-Eu', 'email-Eu-core.txt', 'email-Eu-core-department-labels.txt')
print('done')

Preparing primary_school...
Preparing workplace...
Preparing highschool_2011...
Preparing highschool_2012...
Preparing hospital...
Preparing moreno_blogs...
Preparing moreno_sheep...
Preparing moreno_seventh...
Preparing petster-hamster...
Preparing email-Eu...
done


In [3]:
# Prepare dataframes
prepared_datasets_path = 'prepared_datasets'
prepared_dataframes_path = 'prepared_dataframes'
prepared_dataset_names = [
    'primary_school', 'workplace', 
    'highschool_2011', 'highschool_2012', 'hospital', 
    'moreno_blogs', 'moreno_sheep', 'moreno_seventh',
    # big datasets
    'petster-hamster', 'email-Eu'
]


for dataset_name in prepared_dataset_names:
    print('Processing {}'.format(dataset_name))
    dataset_path = os.path.join(prepared_datasets_path, dataset_name)
    df_dirpath = os.path.join(prepared_dataframes_path, dataset_name)
    
    graph = load_dataset_to_graph(dataset_path, node_limit=500)
    export_training_dataframes(graph, dataset_path, df_dirpath)
    gc.collect()
    

Processing primary_school
Processing workplace
Processing highschool_2011
Processing highschool_2012
Processing hospital
Processing moreno_blogs
Cutting nodes up to 500
Processing moreno_sheep
Processing moreno_seventh
Processing petster-hamster
Cutting nodes up to 500
Processing email-Eu
Cutting nodes up to 500
