In [1]:
import sys
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

sys.path.append('../')

from utils import Graph

graph = Graph('bolt://localhost:7687', 'neo4j', 'neo4jneo4j')

In [2]:
query_graph_drop_all = """ 
CALL gds.graph.list() YIELD graphName
UNWIND graphName as t
CALL gds.graph.drop(t) YIELD schema
RETURN schema
"""

query_pipeline_drop_all = """ 
CALL gds.beta.pipeline.list() YIELD pipelineName
UNWIND pipelineName as t
CALL gds.beta.pipeline.drop(t) YIELD pipelineInfo
RETURN pipelineInfo
"""

graph.query_run(query_pipeline_drop_all, {})
graph.query_run(query_graph_drop_all, {})

[]
[]


In [3]:
def run_link_prediction_pipeline(pipe_name, lp_pipeline_model, model):
  
    query1 = f"""CALL gds.beta.pipeline.linkPrediction.create('{pipe_name}')"""

    query2 = f"""CALL gds.beta.pipeline.linkPrediction.addNodeProperty('{pipe_name}', 'fastRP', {{
      mutateProperty: 'embedding',
      embeddingDimension: 256,
      randomSeed: 42
    }})"""

    query3 = f"""CALL gds.beta.pipeline.linkPrediction.addFeature('{pipe_name}', 'hadamard', {{
      nodeProperties: ['embedding']
    }}) YIELD featureSteps"""

    query4 = f"""CALL gds.beta.pipeline.linkPrediction.configureSplit('{pipe_name}', {{
      testFraction: 0.25,
      trainFraction: 0.6,
      validationFolds: 3
    }})
    YIELD splitConfig"""

    if model == "LogisticRegression":
        query5 = f"""
        CALL gds.beta.pipeline.linkPrediction.addLogisticRegression('{pipe_name}', {{maxEpochs: 500, penalty: 0.01}})
        YIELD parameterSpace
        RETURN parameterSpace.RandomForest AS randomForestSpace, parameterSpace.LogisticRegression AS logisticRegressionSpace, parameterSpace.MultilayerPerceptron AS MultilayerPerceptronSpace
        """
    elif model == "RandomForest":
        query5 = f"""
        CALL gds.beta.pipeline.linkPrediction.addRandomForest('{pipe_name}', {{maxDepth: 10, numberOfDecisionTrees: 10}})
        YIELD parameterSpace
        RETURN parameterSpace.RandomForest AS randomForestSpace, parameterSpace.LogisticRegression AS logisticRegressionSpace, parameterSpace.MultilayerPerceptron AS MultilayerPerceptronSpace
        """
    else:
        query5 = f"""
        CALL gds.alpha.pipeline.linkPrediction.addMLP('{pipe_name}', {{
          hiddenLayerSizes : [4, 2], 
          penalty: 0.5, 
          patience: 2, 
          classWeights: [0.55, 0.45], 
          focusWeight: 0.1
        }}) YIELD parameterSpace
        RETURN parameterSpace.RandomForest AS randomForestSpace, parameterSpace.LogisticRegression AS logisticRegressionSpace, parameterSpace.MultilayerPerceptron AS MultilayerPerceptronSpace
        """

    query6 = f"""CALL gds.beta.pipeline.linkPrediction.configureAutoTuning('{pipe_name}', {{
      maxTrials: 2
    }}) YIELD autoTuningConfig"""

    query7 = """ 
    CALL gds.graph.project(
      'myGraph',
      'Company',
      {
        SUPPLIES_TO: {
          orientation: 'UNDIRECTED'
        }
      }
    )
    """

    query8 = f"""
    CALL gds.beta.pipeline.linkPrediction.train.estimate('myGraph', 
      {{
        pipeline: '{pipe_name}', 
        modelName: '{lp_pipeline_model}',
        targetRelationshipType: 'SUPPLIES_TO' 
      }}
    )
    YIELD requiredMemory
    """

    query9 = f"""CALL gds.beta.pipeline.linkPrediction.train('myGraph', 
    {{
      pipeline: '{pipe_name}',
      modelName: '{lp_pipeline_model}',
      randomSeed: 12,
      targetRelationshipType: 'SUPPLIES_TO' 
      }}) 
    YIELD modelInfo, modelSelectionStats
    RETURN
      modelInfo.bestParameters AS winningModel,
      modelInfo.metrics.AUCPR.train.avg AS avgTrainScore,
      modelInfo.metrics.AUCPR.outerTrain AS outerTrainScore,
      modelInfo.metrics.AUCPR.test AS testScore,
      [cand IN modelSelectionStats.modelCandidates | cand.metrics.AUCPR.validation.avg] AS validationScores
    """

    query12 = f""" 
    CALL gds.beta.model.drop('{lp_pipeline_model}')
    """

    query10 = f""" 
    CALL gds.beta.pipeline.drop('{pipe_name}')
    """

    query11 = """
    CALL gds.graph.drop('myGraph')
    """

    graph.query_run(query1, {})
    graph.query_run(query2, {})
    graph.query_run(query3, {})
    graph.query_run(query4, {})
    graph.query_run(query5, {})
    # graph.query_run(query6, {})
    graph.query_run(query7, {})
    graph.query_run(query8, {})
    graph.query_run(query9, {})
    graph.query_run(query10, {})
    graph.query_run(query11, {})
    graph.query_run(query12, {})




In [4]:
run_link_prediction_pipeline('pipe1', lp_pipeline_model= 'model1', model='LogisticRegression')

[{'name': 'pipe1', 'nodePropertySteps': [], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'name': 'pipe1', 'nodePropertySteps': [{'name': 'gds.fastRP.mutate', 'config': {'randomSeed': 42, 'contextRelationshipTypes': [], 'embeddingDimension': 256, 'contextNodeLabels': [], 'mutateProperty': 'embedding'}}], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'featureSteps': [{'name': 'HADAMARD', 'config': {'nodeProperties': ['embedding']}}]}]
[{'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.25, 'validationFolds': 3, 'trainFraction': 0.6}}]
[{

In [5]:
run_link_prediction_pipeline('pipe2', lp_pipeline_model= 'model2', model='RandomForest')

[{'name': 'pipe2', 'nodePropertySteps': [], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'name': 'pipe2', 'nodePropertySteps': [{'name': 'gds.fastRP.mutate', 'config': {'randomSeed': 42, 'contextRelationshipTypes': [], 'embeddingDimension': 256, 'contextNodeLabels': [], 'mutateProperty': 'embedding'}}], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'featureSteps': [{'name': 'HADAMARD', 'config': {'nodeProperties': ['embedding']}}]}]
[{'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.25, 'validationFolds': 3, 'trainFraction': 0.6}}]
[{

In [6]:
run_link_prediction_pipeline('pipe3', lp_pipeline_model= 'model3', model='MultilayerPerceptron')

[{'name': 'pipe3', 'nodePropertySteps': [], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'name': 'pipe3', 'nodePropertySteps': [{'name': 'gds.fastRP.mutate', 'config': {'randomSeed': 42, 'contextRelationshipTypes': [], 'embeddingDimension': 256, 'contextNodeLabels': [], 'mutateProperty': 'embedding'}}], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'featureSteps': [{'name': 'HADAMARD', 'config': {'nodeProperties': ['embedding']}}]}]
[{'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.25, 'validationFolds': 3, 'trainFraction': 0.6}}]
[{

In [7]:
# query12 = """ 
# CALL gds.beta.model.drop('model2')
# """

# query10 = """ 
# CALL gds.beta.pipeline.drop('pipe3')
# YIELD pipelineName, pipelineType
# """

# query11 = """
# CALL gds.graph.drop('myGraph') YIELD graphName;
# """

# # graph.query_run(query12,{})
# graph.query_run(query10,{})
# graph.query_run(query11,{})