In [1]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np
import string

In [2]:
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('jupyter', 'jup'))


In [3]:

with driver.session() as session:
    result = session.run("MATCH (a) RETURN a limit 5")

In [4]:
# result.data()

In [5]:
df = pd.read_csv("export_movies.csv")

In [6]:
df.head()

Unnamed: 0,Title,Genre,Actor,Director,Musician,Photography,Producer,Writer,Release Year,Studio,Release Date,Language,Country,Color,Runtime,Region,IMDb Url,Plot,IMDb Rating,Audience Rating
0,The Adventure of Denchu-Kozo,"Comedy, Horror, Science Fiction","Kei Fujiwara, Nobu Kanaoka, N. Senba",Shin'ya Tsukamoto,,,,Shin'ya Tsukamoto,1987,,1987,Japanese,Japan,Color,47,,https://www.imdb.com/title/tt0166069/?ref_=ref...,Hikari is a boy who is bullied and teased by t...,6.6,
1,Bullet Ballet,"Crime, Drama, Science Fiction, Thriller","Shin'ya Tsukamoto, Kirina Mano, Tatsuya Nakamura",Shin'ya Tsukamoto,Chu Ishikawa,Shin'ya Tsukamoto,Igarashi Maison,Shin'ya Tsukamoto,1998,Kaijyu Theater,1998,Japanese,Japan,Color,87,,https://www.imdb.com/title/tt0163563/?ref_=ref...,"After his girlfriend commits suicide, a man (S...",7.1,
2,Day of the Animals,"Horror, Science Fiction, Thriller","Christopher George, Leslie Nielsen, Lynda Day ...",William Girdler,Lalo Schifrin,Robert Sorrentino,"Edward L. Montoro, David Sheldon","William W. Norton, Eleanor E. Norton, Edward L...",1977,,1977,English,USA,Color,97,,https://www.imdb.com/title/tt0075913/?ref_=ref...,The depletion of the earth's ozone layer cause...,5.3,US - PG
3,Days of Thunder,"Action, Drama, Sports","Tom Cruise, Robert Duvall, Nicole Kidman",Tony Scott,Hans Zimmer,Ward Russell,"Jerry Bruckheimer, Gerald R. Molen, Don Simpson","Robert Towne, Tom Cruise",1990,Paramount Pictures,1990,English,USA,Color,107,Region Free,https://www.imdb.com/title/tt0099371/?ref_=ref...,Talented but unproven stock car driver Cole Tr...,6.0,US - PG-13
4,Deep Blood,"Adventure, Horror","Frank Baroni, Cort McCown, Keith Kelsch","Raffaele Donato, Joe D'Amato",Carlo Maria Cordio,Joe D'Amato,,George Nelson Ott,1990,Filmirage,1990,Italian,Italy,Color,95,,https://www.imdb.com/title/tt0097178/?ref_=ref...,Several young men have to stop an ancient nati...,3.2,


In [7]:
df = df.fillna('null')

In [8]:
row_dicts = df.to_dict(orient='records')

In [9]:
# CREATE (TheMatrix:Movie {title:'The Matrix', released:1999, tagline:'Welcome to the Real World'})
# CREATE (Keanu:Person {name:'Keanu Reeves', born:1964})
# CREATE (Carrie:Person {name:'Carrie-Anne Moss', born:1967})
# CREATE (Laurence:Person {name:'Laurence Fishburne', born:1961})
# CREATE (Hugo:Person {name:'Hugo Weaving', born:1960})
# CREATE (LillyW:Person {name:'Lilly Wachowski', born:1967})
# CREATE (LanaW:Person {name:'Lana Wachowski', born:1965})
# CREATE (JoelS:Person {name:'Joel Silver', born:1952})

In [10]:
def node_feat(row):
    pairstrings = []
    for x in row.items():
        feat = x[0].replace(" ","_")
        val = x[1]
        if x[1] == 'null':
            strin = f"{feat}:{val}"
        elif isinstance(x[1], str):
            val = val.replace('"',"")
            strin = f'{feat}:"{val}"'
        else:
            strin = f"{feat}:{val}"

        pairstrings.append(strin)

    dictstring = ", ".join(pairstrings)
    return dictstring
    

In [11]:
def create_movie( movie):
    return "CREATE (%s:Movie {%s})" % (movie['Title'].lower().translate(str.maketrans('', '', string.punctuation)).replace(" ","_"), node_feat(movie))

In [27]:
for row in row_dicts:
    with driver.session() as sess:
        sess.run(create_movie(row))

In [28]:
with driver.session() as session:
    result = session.run("MATCH (a) RETURN count(a)")

In [29]:
result.data()

[{'count(a)': 15}]

In [30]:
def people_list(row, col):
    plist = row[col].split(", ")
    return plist


In [31]:
def merge_people_query(col, edge, num_peeps):  
    
    mmatch = "MATCH (m:Movie {Title:$movie})"
    pmatch = [mmatch]
    attach = []
    peeps = [f"person_{x}" for x in range(num_peeps)]
    
    for peep in peeps:
        pmatch.append("MERGE (%s:Person {Name:%s})" % (peep, "$"+peep))
        attach.append("MERGE (m)<-[:%s]-(%s)" % (edge, peep))
        attach.append("SET %s : %s" % (peep, col))
    
    merges = " \n".join(pmatch + attach)
    return merges


In [32]:
print(merge_people_query('Actor', 'ACTED_IN', 4))

MATCH (m:Movie {Title:$movie}) 
MERGE (person_0:Person {Name:$person_0}) 
MERGE (person_1:Person {Name:$person_1}) 
MERGE (person_2:Person {Name:$person_2}) 
MERGE (person_3:Person {Name:$person_3}) 
MERGE (m)<-[:ACTED_IN]-(person_0) 
SET person_0 : Actor 
MERGE (m)<-[:ACTED_IN]-(person_1) 
SET person_1 : Actor 
MERGE (m)<-[:ACTED_IN]-(person_2) 
SET person_2 : Actor 
MERGE (m)<-[:ACTED_IN]-(person_3) 
SET person_3 : Actor


In [33]:
def people_args(row, col):
    pl = people_list(row, col)
    peeps = [f"person_{x}" for x in range(len(pl))]
    peep_dict = dict(zip(peeps, pl))
    return peep_dict

In [34]:
def merge_people(driver, row, col, edge):
    
    num_people = len(people_list(row,col))
    query_template = merge_people_query(col, edge, num_people)
    peep_dict = people_args(row, col)
    
    with driver.session() as session:
        result = session.run(query_template,
                             movie=row['Title'],
                             **peep_dict
                            )

In [35]:
people_cols = ['Actor','Director', 'Musician', 'Photography', 'Producer', 'Writer']
edges = ["ACTED_IN", "DIRECTED", "MUSCIAN_IN", "PHOTOGRAPHY_FOR", "PRODUCED", "WROTE"]
role_zip = list(zip(people_cols, edges))
for row in row_dicts:
    for role in role_zip:
        merge_people(driver, row, role[0], role[1])


In [None]:
# think about paralells edges

In [None]:
# Genre',"IN_GENRE",

In [36]:
# with driver.session() as session:
#     result = session.run("MATCH ()-[e]-() RETURN e")

In [38]:
# result.data()