In [1]:
import matplotlib.pyplot as plt
from math import isclose
from sklearn.decomposition import PCA
import os
import networkx as nx
import numpy as np
import pandas as pd
from stellargraph import StellarGraph, datasets
from stellargraph.data import EdgeSplitter
from collections import Counter
import multiprocessing
from IPython.display import display, HTML
from sklearn.model_selection import train_test_split
from py2neo import Graph


%matplotlib inline

In [2]:
graph = Graph("bolt://localhost:7687", auth=("neo4j", "neo4jneo4j"))

## StellarGraph

Stellargraph seen not to work as the ID and the label are seen not to be unique.

In [3]:
query = """
MATCH (p:Person)
RETURN p.label as label, p.trianglesTrain1 as triangles
"""

nodes = graph.run(query).to_data_frame()

In [4]:
query = """
MATCH (p:Person)-[r:INTERACTS_1]->(p2:Person)
RETURN p.id as source, p2.id as target, r.weight as weight
"""

weighted_edges = graph.run(query).to_data_frame()

In [5]:
nodes.set_index('label', inplace=True)

In [6]:
nodes

Unnamed: 0_level_0,triangles
label,Unnamed: 1_level_1
Robett,0
Smalljon,0
Edmures Son,0
Rooses Son,0
Talla,0
...,...
Unsullied Captain,0
Vale Lord,0
Vicky,0
Willa,0


In [7]:
weighted_edges

Unnamed: 0,source,target,weight
0,ADDAM_MARBRAND,TYWIN,2
1,ADDAM_MARBRAND,LEO_LEFFORD,3
2,ADDAM_MARBRAND,KEVAN,3
3,AEGON,MAESTER_AEMON,2
4,AEGON,AERYS,2
...,...,...,...
544,TYRION,TYSHA,2
545,TYRION,VARYS,2
546,VARDIS_EGEN,TYRION,9
547,VARYS,YOREN,3


In [8]:
weighted_homogeneous = StellarGraph(nodes, weighted_edges)
print(weighted_homogeneous.info())

ValueError: expected IDs to appear once, found some that appeared more: 'Lommy', 'Qhorin', 'Wolkan'

## Graphsage embedding 

In [3]:
# Make the in memory graphs for adding triangle counts and clustering coefficients
query1 = """
CALL gds.graph.project(
  'myGraph1',
  'Person',
  {
    INTERACTS_1: {
      orientation: 'UNDIRECTED'
    }
  }
)
"""

query2 = """
CALL gds.graph.project(
  'myGraph2',
  'Person',
  {
    INTERACTS_2: {
    orientation: 'UNDIRECTED'
}
}
)
"""
query3 = """
CALL gds.graph.project(
  'myGraph3',
  'Person',
  {
    INTERACTS_3: {
    orientation: 'UNDIRECTED'
}
  }
)
"""
query4 = """
CALL gds.graph.project(
  'myGraph4',
  'Person',
  {
    INTERACTS_4: {
    orientation: 'UNDIRECTED'
}
  }
)
"""
query5 = """
CALL gds.graph.project(
  'myGraph5',
  'Person',
  {
    INTERACTS_5: {
    orientation: 'UNDIRECTED'
}
  }
)
"""
graph.run(query1)
graph.run(query2)
graph.run(query3)
graph.run(query4)
graph.run(query5)


nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
"{Person: {label: 'Person', properties: {}}}","{INTERACTS_5: {orientation: 'UNDIRECTED', indexInverse: false, aggregation: 'DEFAULT', type: 'INTERACTS_5', properties: {}}}",myGraph5,418,866,21


In [4]:
# Make the in memory graphs for adding triangle counts and clustering coefficients
query6 = """
CALL gds.graph.project(
  'myGraph6',
  'Person',
  {
    INTERACTS_6: {
      orientation: 'UNDIRECTED'
    }
  }
)
"""

query7 = """
CALL gds.graph.project(
  'myGraph7',
  'Person',
  {
    INTERACTS_7: {
      orientation: 'UNDIRECTED'
    }
  }
)
"""

query8 = """
CALL gds.graph.project(
  'myGraph8',
  'Person',
  {
    INTERACTS_8: {
      orientation: 'UNDIRECTED'
    }
  }
)
"""

graph.run(query6)
graph.run(query7)
graph.run(query8)


nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
"{Person: {label: 'Person', properties: {}}}","{INTERACTS_8: {orientation: 'UNDIRECTED', indexInverse: false, aggregation: 'DEFAULT', type: 'INTERACTS_8', properties: {}}}",myGraph8,418,1200,21


In [6]:
query = """ 
CALL gds.degree.mutate(
  'myGraph1',
  {
    mutateProperty: 'degree'
  }
) 
YIELD nodePropertiesWritten
"""

graph.run(query)

nodePropertiesWritten
418


In [28]:
query = """ 
CALL gds.beta.graphSage.train(
  'myGraph1',
  {
    modelName: 'myModel3',
    featureProperties: ['degree'],
    embeddingDimension: 2,
    randomSeed: 19
  }
)
YIELD trainMillis
RETURN trainMillis
"""

graph.run(query)

ClientError: [Procedure.ProcedureCallFailed] Failed to invoke procedure `gds.beta.graphSage.train`: Caused by: java.lang.IllegalStateException: Storing more than `3` models in the catalog is available with a licensed Graph Data Science library. See documentation at https://neo4j.com/docs/graph-data-science/

In [23]:
query = """ 
CALL gds.beta.graphSage.stream(
  'myGraph1',
  {
    modelName: 'myModel2'
  }
)
YIELD nodeId, embedding
"""

a = graph.run(query).to_data_frame()

In [29]:
a['embedding'][1]

[0.48163160226516744,
 0.4435736385129853,
 0.29744173078652314,
 0.31327502994241757,
 0.38343095593054577,
 0.4874846675119471]

In [30]:
a['embedding'][2]


[0.48163160226516744,
 0.4435736385129853,
 0.29744173078652314,
 0.31327502994241757,
 0.38343095593054577,
 0.4874846675119471]