In [1]:
import sys
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

sys.path.append('../')

from utils import Graph

graph = Graph('bolt://localhost:7687', 'neo4j', 'neo4jneo4j')

In [2]:
query1 = """CALL gds.beta.pipeline.linkPrediction.create('pipe')"""

query2 = """CALL gds.beta.pipeline.linkPrediction.addNodeProperty('pipe', 'fastRP', {
  mutateProperty: 'embedding',
  embeddingDimension: 256,
  randomSeed: 42
})"""

query3 = """CALL gds.beta.pipeline.linkPrediction.addFeature('pipe', 'hadamard', {
  nodeProperties: ['embedding']
}) YIELD featureSteps"""

query4 = """CALL gds.beta.pipeline.linkPrediction.configureSplit('pipe', {
  testFraction: 0.25,
  trainFraction: 0.6,
  validationFolds: 3
})
YIELD splitConfig"""

query5 = """
CALL gds.beta.pipeline.linkPrediction.addLogisticRegression('pipe', {maxEpochs: 500, penalty: 0.01})
YIELD parameterSpace
RETURN parameterSpace.RandomForest AS randomForestSpace, parameterSpace.LogisticRegression AS logisticRegressionSpace, parameterSpace.MultilayerPerceptron AS MultilayerPerceptronSpace                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
"""

query6 = """CALL gds.beta.pipeline.linkPrediction.configureAutoTuning('pipe', {
  maxTrials: 2
}) YIELD autoTuningConfig"""


query7 = """ 
CALL gds.graph.project(
  'myGraph',
  'Company',
  {
    SUPPLIES_TO: {
      orientation: 'UNDIRECTED'
    }
  }
)
"""

query8 = """
CALL gds.beta.pipeline.linkPrediction.train.estimate('myGraph', 
  {
    pipeline: 'pipe', 
    modelName: 'lp-pipeline-model',
    targetRelationshipType: 'SUPPLIES_TO' 
  }
)
YIELD requiredMemory
"""

query9 = """CALL gds.beta.pipeline.linkPrediction.train('myGraph', 
{
  pipeline: 'pipe',
  modelName: 'lp-pipeline-model',
  randomSeed: 12,
  targetRelationshipType: 'SUPPLIES_TO' 
  }) 
YIELD modelInfo, modelSelectionStats
RETURN
  modelInfo.bestParameters AS winningModel,
  modelInfo.metrics.AUCPR.train.avg AS avgTrainScore,
  modelInfo.metrics.AUCPR.outerTrain AS outerTrainScore,
  modelInfo.metrics.AUCPR.test AS testScore,
  [cand IN modelSelectionStats.modelCandidates | cand.metrics.AUCPR.validation.avg] AS validationScores"""



In [3]:
graph.query_run(query1,{})
graph.query_run(query2,{})
graph.query_run(query3,{})
graph.query_run(query4,{})
graph.query_run(query5,{})
# graph.query_run(query6,{})
graph.query_run(query7,{})
graph.query_run(query8,{})
graph.query_run(query9,{})


[{'name': 'pipe', 'nodePropertySteps': [], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'name': 'pipe', 'nodePropertySteps': [{'name': 'gds.fastRP.mutate', 'config': {'randomSeed': 42, 'contextRelationshipTypes': [], 'embeddingDimension': 256, 'contextNodeLabels': [], 'mutateProperty': 'embedding'}}], 'featureSteps': [], 'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.1, 'validationFolds': 3, 'trainFraction': 0.1}, 'autoTuningConfig': {'maxTrials': 10}, 'parameterSpace': {'MultilayerPerceptron': [], 'RandomForest': [], 'LogisticRegression': []}}]
[{'featureSteps': [{'name': 'HADAMARD', 'config': {'nodeProperties': ['embedding']}}]}]
[{'splitConfig': {'negativeSamplingRatio': 1.0, 'testFraction': 0.25, 'validationFolds': 3, 'trainFraction': 0.6}}]
[{'r

In [4]:
query10 = """ 
CALL gds.beta.pipeline.drop('pipe')
YIELD pipelineName, pipelineType
"""

query11 = """
CALL gds.graph.drop('myGraph') YIELD graphName;
"""

graph.query_run(query10,{})
graph.query_run(query11,{})

[{'pipelineName': 'pipe', 'pipelineType': 'Link prediction training pipeline'}]
[{'graphName': 'myGraph'}]
