In [1]:
from graphdatascience import GraphDataScience
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.width', 0)
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_rows', 12)
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget

In [2]:
uri = "bolt://localhost:7687" 
user = "neo4j"  
password = "12345678"
driver = GraphDatabase.driver(uri=uri,auth=(user,password),database='neo4j')
session = driver.session(database='neo4j')

### Visualize the graph

In [3]:

result = session.run("CALL db.schema.visualization()")
w = GraphWidget(graph = result.graph())
w.show()

GraphWidget(layout=Layout(height='500px', width='100%'))

In [9]:
session.run('''
CALL gds.graph.project(
  'ratings',
  ['User','Movie'],
  {
    RATED: {
      orientation: 'UNDIRECTED',
      properties: 'rating'
    }
  }
)
''')

<neo4j._sync.work.result.Result at 0x187bd874c40>

In [13]:
session.run('''
CALL gds.fastRP.mutate('ratings',
  {
    embeddingDimension: 256,
    randomSeed: 7474,
    mutateProperty: 'embedding',
    relationshipWeightProperty: 'rating',
    iterationWeights: [0.8, 1, 1, 1]
  }
)
YIELD nodePropertiesWritten
''')

<neo4j._sync.work.result.Result at 0x187bde07880>

In [16]:
result = session.run('''
    CALL gds.knn.write('ratings', {
    topK: 5,
    nodeProperties: ['embedding'],
    randomSeed: 42,
    concurrency: 1,
    sampleRate: 1.0,
    deltaThreshold: 0.0,
    writeRelationshipType: "SIMILAR",
    writeProperty: "score"
})
YIELD nodesCompared, relationshipsWritten, similarityDistribution
RETURN nodesCompared, relationshipsWritten, similarityDistribution.mean as meanSimilarity
''').to_df()
print(result)

[{'nodesCompared': 20094, 'relationshipsWritten': 100470, 'meanSimilarity': 0.971254386761372}]


In [4]:
similar_user = session.run('''
    MATCH (n:User)-[r:SIMILAR]->(m:User)
    RETURN n.id as person1, m.id as person2, r.score as similarity
    ORDER BY similarity DESCENDING, person1, person2 limit 100
''').to_df()
print(similar_user)

    person1  person2  similarity
0   user126  user379    0.989916
1   user126  user379    0.989916
2   user379  user126    0.989916
3   user379  user126    0.989916
4   user130  user574    0.987372
..      ...      ...         ...
95  user584  user321    0.980616
96  user569  user126    0.980614
97  user569  user126    0.980614
98  user145   user46    0.980519
99  user145   user46    0.980519

[100 rows x 3 columns]


### Recommendation

In [5]:
user_recommendation = session.run('''
    MATCH (u:User {id: 'user126'})
    MATCH (u)-[:SIMILAR]->(similarUser)-[:RATED]->(m:Movie)
    WHERE NOT EXISTS((u)-[:RATED]->(m))
    RETURN u.id AS userId, m.title AS movieTitle, COLLECT(similarUser.id) AS similarUserIds, u, m, similarUser
    LIMIT 10
''')
print(user_recommendation.to_df())

w = GraphWidget(graph = user_recommendation.graph())
w.show()

    userId                      movieTitle      similarUserIds     u  \
0  user126                    Pretty Woman  [user379, user379]  (id)   
1  user126                    Pretty Woman  [user379, user379]  (id)   
2  user126                           Ghost  [user379, user379]  (id)   
3  user126                           Ghost  [user379, user379]  (id)   
4  user126            Sleepless in Seattle  [user379, user379]  (id)   
5  user126            Sleepless in Seattle  [user379, user379]  (id)   
6  user126                Schindler's List  [user379, user379]  (id)   
7  user126                Schindler's List  [user379, user379]  (id)   
8  user126  Ace Ventura: When Nature Calls  [user379, user379]  (id)   
9  user126  Ace Ventura: When Nature Calls  [user379, user379]  (id)   

                                   m similarUser  
0  (tmdbId, year, imdbId, id, title)        (id)  
1  (tmdbId, year, imdbId, id, title)        (id)  
2  (tmdbId, year, imdbId, id, title)        (id)  
3  

GraphWidget(layout=Layout(height='500px', width='100%'))