In [None]:
import sys
import time
import json
from pprint import pprint

import pandas as pd

from neo4j import GraphDatabase
from neo4j.exceptions import ClientError
from dask import delayed, compute
import multiprocessing.popen_spawn_posix
from dask.distributed import Client

import helpers.graph_services as gs
import helpers.helper as helper

In [None]:
def check_if_graph_exists(neo4j_connection, graph_name):
    query="""CALL gds.graph.exists('{graph_name}')""".format(graph_name=graph_name)
    res=neo4j_connection.run_single_query(query)
    return res

def get_graph_information(neo4j_connection, graph_name):
    query="""CALL gds.graph.list('{graph_name}')""".format(graph_name=graph_name)
    res=neo4j_connection.run_single_query(query)
    return res

def create_graph(neo4j_connection, graph_name):
    query = """
    CALL gds.graph.create(
      '{graph_name}', 
      {{
        TWITCH_USER: {{
            label:'TWITCH_USER', 
            properties: ['days', 'views']
            }}
      }}, 
      '*'
    )
    """.format(graph_name=graph_name)
    res=neo4j_connection.run_single_query(query)
    return res

def get_gds_query(graph_name, algorithm_name, execution_mode="stream", properties={}, estimate=False):
    properties_as_string = helper.convert_dict_to_string(properties)
    if estimate:
        query="""CALL gds.{algorithm_name}.{execution_mode}.estimate('{graph_name}', {{ {properties} }})""".format(
            algorithm_name=algorithm_name,
            execution_mode=execution_mode,
            graph_name=graph_name,
            properties=properties_as_string)
    else:
        query="""CALL gds.{algorithm_name}.{execution_mode}('{graph_name}', {{ {properties} }})""".format(
            algorithm_name=algorithm_name,
            execution_mode=execution_mode,
            graph_name=graph_name,
            properties=properties_as_string)
    return query
    
def run_gds_algorithm(neo4j_connection, query):
    try:
        res=neo4j_connection.run_single_query(query)
    except ClientError as e:
        print(e)
        res=[{'error':e}]
    return res

In [None]:
GRAPH_NAME="twitch-graph"

In [None]:
client = Client(n_workers=6, threads_per_worker=6)

In [None]:
neo4j_connection = gs.graph_driver(uri_scheme='bolt', host='localhost', port='8687', username='neo4j', password='vaibhav123')

existing_graphs = check_if_graph_exists(neo4j_connection, GRAPH_NAME)
if not existing_graphs[0]['exists']:
    r = create_graph(neo4j_connection, GRAPH_NAME)
else:
    r = get_graph_information(neo4j_connection, GRAPH_NAME)
res = dict(r[0].items())
pprint(res)


In [None]:
def run_algorithms(graph_name, algorithm, properties, algorithm_class):
    neo4j_connection = gs.graph_driver(uri_scheme='bolt', host='localhost', port='8687', username='neo4j', password='vaibhav123')
    query_time = time.time()
    query = get_gds_query(graph_name, algorithm, 'write', properties)
    
    res=run_gds_algorithm(neo4j_connection, query)
    r=[]
    for rs in res:
        r.append(dict(rs.items()))
    return {'results':r, 'time':time.time()-query_time, 'query':query, 
            'algorithm':algorithm, 'algorithm_class': algorithm_class, 'properties':properties}

def estimate_algorithms(graph_name, algorithm, properties, algorithm_class):
    neo4j_connection = gs.graph_driver(uri_scheme='bolt', host='localhost', port='8687', username='neo4j', password='vaibhav123')
    query_time = time.time()
    query = str(get_gds_query(graph_name, algorithm, 'write', properties, True))
    r=dict(run_gds_algorithm(neo4j_connection, query)[0].items())
    return {'results':r, 'time':time.time()-query_time, 'query':query, 
            'algorithm':algorithm, 'algorithm_class': algorithm_class, 'properties':properties}


In [None]:
list_of_algorithms =  helper.get_list_of_algorithms()
res = []
for class_of_algorithm in list_of_algorithms:
    for algorithm in list_of_algorithms[class_of_algorithm]:
        props=list_of_algorithms[class_of_algorithm][algorithm]
#         lazy_result = delayed(estimate_algorithms)(GRAPH_NAME, algorithm, props, class_of_algorithm)
        lazy_result = delayed(run_algorithms)(GRAPH_NAME, algorithm, props, class_of_algorithm)
        res.append(lazy_result)

In [None]:
print(res)

In [None]:
%%time
actual_res = compute(res)

In [None]:
pprint(actual_res)

In [None]:
def stringify_algos(list_of_algorithms):
    writeProps=[]
    for class_of_algorithm in list_of_algorithms:
        print(class_of_algorithm)
        for algorithm in list_of_algorithms[class_of_algorithm]:
            print("\t"+algorithm)
            for prop in list_of_algorithms[class_of_algorithm][algorithm]:
                print("\t\t"+prop+":"+str(list_of_algorithms[class_of_algorithm][algorithm][prop]))
            writeProps.append("n."+list_of_algorithms[class_of_algorithm][algorithm]['writeProperty'])
    return writeProps

writeProps=stringify_algos(list_of_algorithms)

In [None]:
query="MATCH (n:TWITCH_USER) REMOVE " +", ".join(writeProps)
neo4j_connection = gs.graph_driver(uri_scheme='bolt', host='localhost', port='8687', username='neo4j', password='vaibhav123')
res=neo4j_connection.run_single_query(query)
print(res)