In [2]:
from graphdatascience import GraphDataScience
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget

In [3]:
uri = "bolt://localhost:7687" 
user = "neo4j"  
password = "12345678"
driver = GraphDatabase.driver(uri=uri,auth=(user,password),database='neo4j')
session = driver.session(database='neo4j')

### Visualize the graph

In [4]:

result = session.run("CALL db.schema.visualization()")
w = GraphWidget(graph = result.graph())
w.show()

GraphWidget(layout=Layout(height='500px', width='100%'))

### 1: Graph creation

In [5]:
session.run('''
CALL gds.graph.project(
  'ratings',
  ['User','Movie'],
  {
    RATED: {
      orientation: 'UNDIRECTED',
      properties: 'rating'
    }
  }
)
''')

<neo4j._sync.work.result.Result at 0x163dcc497e0>

### 2: Node embedding

In [6]:
session.run('''
CALL gds.fastRP.mutate('ratings',
  {
    embeddingDimension: 512,
    randomSeed: 7474,
    mutateProperty: 'embedding',
    relationshipWeightProperty: 'rating',
    iterationWeights: [0.8, 1, 1, 1]
  }
)
YIELD nodePropertiesWritten
return nodePropertiesWritten
''')

<neo4j._sync.work.result.Result at 0x163f1765840>

### 3: Node similarity with KNN

In [7]:
result = session.run('''
    CALL gds.knn.write('ratings', {
    topK: 5,
    nodeProperties: ['embedding'],
    randomSeed: 42,
    concurrency: 1,
    sampleRate: 1.0,
    deltaThreshold: 0.0,
    writeRelationshipType: "SIMILAR",
    writeProperty: "score"
})
YIELD nodesCompared, relationshipsWritten, similarityDistribution
RETURN nodesCompared, relationshipsWritten, similarityDistribution.mean as meanSimilarity
''').to_df()
print(result)

   nodesCompared  relationshipsWritten  meanSimilarity
0          10352                 51760        0.964479


### 4: Explore the graph. Print similar users

In [8]:
similar_user = session.run('''
    MATCH (n:User)-[r:SIMILAR]->(m:User)
    RETURN n.id as person1, m.id as person2, r.score as similarity
    ORDER BY similarity DESCENDING, person1, person2 limit 100
''').to_df()
print(similar_user)

    person1  person2  similarity
0   user126  user379    0.989789
1   user126  user379    0.989789
2   user379  user126    0.989789
3   user379  user126    0.989789
4   user130  user574    0.987911
..      ...      ...         ...
95  user340  user379    0.980332
96  user340   user81    0.980311
97  user340   user81    0.980311
98   user81  user340    0.980311
99   user81  user340    0.980311

[100 rows x 3 columns]


### 5: Make moive recommendations by user KNN

In [9]:
query = '''
MATCH (u:User {id: 'user379'})
MATCH (u)-[r1:SIMILAR]->(similarUser)-[r2:RATED]->(m:Movie)
WHERE NOT EXISTS((u)-[:RATED]->(m))
RETURN u.id AS userId, m.title AS movieTitle, m.id AS movieId
LIMIT 10
'''
result = session.run(query)
print(result.to_df())

    userId                       movieTitle movieId
0  user379             Beauty and the Beast     595
1  user379        Silence of the Lambs, The     593
2  user379              Legends of the Fall     266
3  user379                         Clueless      39
4  user379             Beauty and the Beast     595
5  user379        Silence of the Lambs, The     593
6  user379              Legends of the Fall     266
7  user379                         Clueless      39
8  user379  Dumb & Dumber (Dumb and Dumber)     231
9  user379             Natural Born Killers     288


#### visualize the graph

In [10]:
query = '''
MATCH (u:User {id: 'user5'})
MATCH (u)-[r1:SIMILAR]->(similarUser)-[r2:RATED]->(m:Movie)
WHERE NOT EXISTS((u)-[:RATED]->(m))
RETURN u, similarUser, m, r1, r2
LIMIT 10
'''
result = session.run(query)
widget = GraphWidget(graph = result.graph())
widget.show()

GraphWidget(layout=Layout(height='500px', width='100%'))

### 6: Recommendation by movie KNN

In [11]:
query = '''
MATCH (targetMovie:Movie {id: '68954'})
MATCH (targetMovie)-[similarity:SIMILAR]-(similarMovie:Movie)
WHERE similarMovie.id <> targetMovie.id
RETURN DISTINCT similarMovie.id AS MovieId, similarMovie.title AS movieTitle , similarity.score AS score
ORDER BY similarity.score DESC
LIMIT 10
'''
result = session.run(query)
print(result.to_df())

  MovieId                            movieTitle     score
0   60069                                WALL·E  0.976485
1   72998                                Avatar  0.974298
2   59315                              Iron Man  0.969230
3   58559                      Dark Knight, The  0.967764
4   79132                             Inception  0.967023
5   76093              How to Train Your Dragon  0.964197
6   81845                    King's Speech, The  0.962040
7   64957  Curious Case of Benjamin Button, The  0.956786
8   92259                          Intouchables  0.942228
9   96821      Perks of Being a Wallflower, The  0.941262


#### Visualize the graph

In [12]:
query = '''
MATCH (targetMovie:Movie {id: '68954'})
MATCH (targetMovie)-[similarity:SIMILAR]-(similarMovie:Movie)
WHERE similarMovie.id <> targetMovie.id
RETURN DISTINCT targetMovie, similarMovie, similarity
ORDER BY similarity.score DESC
LIMIT 12
'''
result = session.run(query)
widget = GraphWidget(graph = result.graph())
widget.show()

GraphWidget(layout=Layout(height='500px', width='100%'))

#### 7: Recommendation by KNN + PageRank Algorithm

In [13]:
query = '''
CALL gds.graph.project(
  	'pagerank_collab',
  	['Movie', 'Movie'],
  	'SIMILAR',
  {
   	 relationshipProperties: {
      	score: {
        	property: 'score',
        	defaultValue: 0.0
      }
    }
  }
);
'''
result = session.run(query)


In [14]:
query = '''
    MATCH (src1:Movie {id: $movieid})
    CALL gds.pageRank.stream('pagerank_collab', {
    maxIterations: 20,
    dampingFactor: 0.85,
    sourceNodes: [src1],
    relationshipTypes: ['SIMILAR'],
    relationshipWeightProperty: 'score'
    })
    YIELD nodeId, score
    WHERE gds.util.asNode(nodeId).id <> src1.id
    WITH gds.util.asNode(nodeId) AS movie, score
    RETURN movie.id AS movieId, movie.title AS movieTitle, score
    ORDER BY score DESC
    LIMIT $n;
'''
result = session.run(query, movieid='68954', n=10)
print(result.to_df())

  movieId              movieTitle     score
0   58559        Dark Knight, The  0.072987
1   59315                Iron Man  0.072758
2   79132               Inception  0.058580
3   72998                  Avatar  0.054995
4   60069                  WALL·E  0.040782
5   89745           Avengers, The  0.033818
6   91529  Dark Knight Rises, The  0.031183
7   77561              Iron Man 2  0.025844
8   99114        Django Unchained  0.024425
9   68157    Inglourious Basterds  0.021537


#### Delete SIMILAR relationship

In [15]:
# query = '''
#     MATCH ()-[r:SIMILAR]->()  
#     DELETE r   
# '''
# result = session.run(query)
# print(result.to_df())