## IMPORTING DATA INTO NEO4J


In [11]:
import csv
from py2neo import Graph, Node

In [12]:
N_MOVIES = 9742
N_RATINGS = 100836
N_LINKS = 3683
# N_MOVIES = 1000
# N_RATINGS = 1000
# N_LINKS = 1000

In [13]:
PORT = 7687
USER = "neo4j"
PASS = "12345678"
graph = Graph("bolt://" + ":7687", auth=(USER, PASS))


In [14]:
def loadMovies():
    with open('data/movies.csv', encoding='utf8') as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        next(readCSV, None)  # skip header
        for i, row in enumerate(readCSV):

            createMovieNodes(row)
            if (i % 1000 == 0):
                print(f"{i}/{N_MOVIES} Movie nodes created")

            # break after N_MOVIES movies

            if i >= N_MOVIES:
                break

def createMovieNodes(row):
    movieData = parseRowMovie(row)
    id = movieData[0]
    title = movieData[1]
    year = movieData[2]
    genres = movieData[3]
    mov = Node("Movie", id=id, title=title, year=year, genres=genres)
    graph.create(mov)

def parseRowMovie(row):
        id = row[0]
        year = row[1][-5:-1]
        title = row[1][:-7]
        genres = row[2]

        return (id, title, year, genres)

In [15]:
def loadRatings():
    with open('data/ratings.csv') as csvfile:
         readCSV = csv.reader(csvfile, delimiter=',')
         next(readCSV, None) #skip header
         for i,row in enumerate(readCSV):
             createUserNodes(row)
             createRatingRelationship(row)

             if (i % 1000 == 0):
                 print(f"{i}/{N_RATINGS} Rating relationships created")

             if (i >= N_RATINGS):
                 break
def createUserNodes(row):
    user = Node("User", id="user" + row[0])
    graph.merge(user, "User", "id")

def createRatingRelationship(row):
    ratingData = parseRowRatingRelationships(row)

    graph.run(
        'MATCH (u:User {id: $userId}), (m:Movie {id: $movieId}) CREATE (u)-[:RATED { rating: $rating, timestamp: $timestamp }]->(m)',
        userId=ratingData[0], movieId=ratingData[1], rating=ratingData[2], timestamp=ratingData[3])

def parseRowRatingRelationships(row):
    userId = "user" + row[0]
    movieId = row[1]
    rating = float(row[2])
    timestamp = row[3]

    return (userId, movieId, rating, timestamp)

In [16]:
def loadLinks():
    with open('data/links.csv') as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        next(readCSV, None)  # skip header
        for i, row in enumerate(readCSV):

            updateMovieNodeWithLinks(row)

            if (i % 1000 == 0):
                print(f"{i}/{N_LINKS} Movie nodes updated with links")

            # break after N_LINKS movies

            if i >= N_LINKS:
                break

def updateMovieNodeWithLinks(row):
    linkData = parseRowLinks(row)

    graph.run(
        'MATCH (m:Movie {id: $movieId}) SET m += { imdbId: $imdbId , tmdbId: $tmdbId }',
        movieId=linkData[0], imdbId=linkData[1], tmdbId=linkData[2])

def parseRowLinks(row):
    movieId = row[0]
    imdbId = row[1]
    tmdbId = row[2]

    return (movieId, imdbId, tmdbId)

### Start importing data into Neo4j

In [17]:
print("Step 1 out of 3: loading movie nodes")
loadMovies()

print("Step 2 out of 3: loading rating relationships")
loadRatings()

print("Step 3 out of 3: updating links to movie nodes")
loadLinks()

Step 1 out of 3: loading movie nodes
0/9742 Movie nodes created
1000/9742 Movie nodes created
2000/9742 Movie nodes created
3000/9742 Movie nodes created
4000/9742 Movie nodes created
5000/9742 Movie nodes created
6000/9742 Movie nodes created
7000/9742 Movie nodes created
8000/9742 Movie nodes created
9000/9742 Movie nodes created
Step 2 out of 3: loading rating relationships
0/100836 Rating relationships created
1000/100836 Rating relationships created
2000/100836 Rating relationships created
3000/100836 Rating relationships created
4000/100836 Rating relationships created
5000/100836 Rating relationships created
6000/100836 Rating relationships created
7000/100836 Rating relationships created
8000/100836 Rating relationships created
9000/100836 Rating relationships created
10000/100836 Rating relationships created
11000/100836 Rating relationships created
12000/100836 Rating relationships created
13000/100836 Rating relationships created
14000/100836 Rating relationships created
150

### GET SOME DATA AND VISUALIZE IT

In [18]:
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget

In [19]:
uri = "bolt://localhost:7687" 
user = "neo4j"  
password = "12345678"

driver = GraphDatabase.driver(uri=uri,auth=(user,password),database='neo4j')
session = driver.session(database='neo4j')

Failed to write data to connection IPv4Address(('localhost', 7687)) (IPv4Address(('127.0.0.1', 7687)))


In [20]:

result = session.run("MATCH (m)-[r]->(t) RETURN m,r,t LIMIT 100")

w = GraphWidget(graph = result.graph())

w.show()

GraphWidget(layout=Layout(height='500px', width='100%'))