# Python Script for Neo4J

### This python script connects to Neo4j graph DB and creates Nodes and Relationships

###### Nodes
1. User
2. Business
3. Review
4. Zipcode
5. Event

##### Relationships
1. (:User) - [:`Requested Services From`] -> (:Business)
2. (:Business) - [:`Located In`] -> (:Zipcode)
3. (:Zipcode) - [:`Has`] -> (:Event)
4. (:User) - [:`Reviewed`] -> (:Business)

In [1]:
# Install required dependencies
!pip install py2neo
!pip install pandas

Collecting py2neo
  Downloading py2neo-2021.2.3-py2.py3-none-any.whl (177 kB)
     |████████████████████████████████| 177 kB 1.9 MB/s            
Collecting pansi>=2020.7.3
  Downloading pansi-2020.7.3-py2.py3-none-any.whl (10 kB)
Collecting monotonic
  Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)
Collecting interchange~=2021.0.4
  Downloading interchange-2021.0.4-py2.py3-none-any.whl (28 kB)
Installing collected packages: pansi, monotonic, interchange, py2neo
Successfully installed interchange-2021.0.4 monotonic-1.6 pansi-2020.7.3 py2neo-2021.2.3


In [2]:
# Import dependencies
from py2neo import Graph, Node, Relationship, NodeMatcher
from py2neo.bulk import create_nodes
import pandas as pd
import json

In [3]:
# Connect to Graph DB
graph = Graph("http://neo4j:7474/db/data/", auth=("neo4j", "root"))

In [4]:
# Delete all Nodes and Relationships of Graph DB
graph.run("MATCH (a) DETACH DELETE a")

In [5]:
# Convert the users data into pandas dataframe
users_df = pd.read_csv('./data-export/pg/users', delimiter=",")  

In [6]:
# Convert records to dictionary
users_dict = users_df.to_dict('records')

In [7]:
# Create User Node
create_nodes(graph.auto(), users_dict, labels={"User"})

In [8]:
# Convert the orders data into pandas dataframe
orders_df = pd.read_csv('./data-export/pg/orders', delimiter=",") 

In [9]:
# Convert records to dictionary
orders_dict = orders_df.to_dict('records')

In [10]:
business_ids = set()

for order in orders_dict:
    business_ids.add(order["businessId"])

In [11]:
# Load yelp businesses 

businesses = []
with open('./data-export/mongo/yelp_businesses.json') as json_file:
    businesses = json.load(json_file)

In [12]:
# Create Business Nodes

for business in businesses:
    for bid in business_ids:
        if business["id"] == bid:
            new_business = {
                "id": bid, 
                "name": business["name"], 
                "city": business["location"]["city"], 
                "zipcode": business["location"]["zip_code"],
                "state": business["location"]["state"],
                "country": business["location"]["country"],
                "categories": [b["title"] for b in business["categories"]],
                "rating": business["rating"],
                "review_count": business["review_count"]
            }
            graph.create(Node("Business", **new_business))

In [13]:
users = []

for user in users_dict:
    for order in orders_dict:
        if user["id"] == order["userId"]:
            newData = {
                "id": user["id"],
                "firstName": user["firstName"],
                "lastName": user["lastName"],
                "businessId": order["businessId"],
                "businessName,": order["businessName"],
                "serviceType": order["serviceType"],
                "address": order["address"]
            }
            users.append(newData)

In [14]:
# Create Relationship [:`Requested Services From`] between User and Business

matcher = NodeMatcher(graph)

for user in users:
    user_id = user["id"]
    for business in businesses:
        business_id = business["id"]
        for bid in business_ids:
            if bid == business_id and user["businessId"] == business_id:
                leftNode = matcher.match("User", id=user_id)
                for left in leftNode:
                    rightNode = matcher.match("Business", id=business_id)
                    for right in rightNode:
                        relation = Relationship(left, "Requested Services From", right)
                        graph.create(relation)

In [15]:
city_set = set()
zipcode_set = set()

for business in businesses:
    city = business["location"]["city"]
    zipcode = business["location"]["zip_code"]
    
    city_set.add(city)
    zipcode_set.add(str(zipcode))

In [16]:
# for city in city_set:
#     graph.create(Node("City", city=city))

In [17]:
# Create Zipcode Nodes

for zipcode in zipcode_set:
    graph.create(Node("Zipcode", zipcode=zipcode))

In [18]:
# Create Relationship[:`Located In`] between Business and Zipcode 

for business in businesses:
    zipcode = business["location"]["zip_code"]
    business_id = business["id"]
    for bid in business_ids:
        if bid == business_id:
            leftNode = matcher.match("Business", id=business_id)
            for left in leftNode:
                rightNode = matcher.match("Zipcode", zipcode=zipcode)
                for right in rightNode:
                    relation = Relationship(left, "Located In", right)
                    graph.create(relation)

In [19]:
# Load Yelp Events data

events = []
with open('./data-export/mongo/yelp_events.json') as json_file:
    events = json.load(json_file)

In [20]:
# Create Event Nodes

for event in events:
    new_event = {
        "id": event["id"], 
        "name": event["name"], 
        "city": event["location"]["city"], 
        "zipcode": event["location"]["zip_code"],
        "state": event["location"]["state"],
        "country": event["location"]["country"],
        "category": event["category"],
        "attending_count": event["attending_count"],
        "description": event["description"]
    }
    graph.create(Node("Event", **new_event))

In [21]:
# for city in city_set:
#     for event in events:
#         if city == event["location"]["city"]:
#             leftNode = list(matcher.match("City").where("_.city='" + city + "'"))
#             for left in leftNode:
#                 rightNode = list(matcher.match("Event").where("_.city='" + city + "'"))
#                 for right in rightNode:
#                     relation = Relationship(left, "Has", right)
#                     graph.create(relation)

In [22]:
# Create Relationship [: Has] between Zipcode and Event

for zipcode in zipcode_set:
    for event in events:
        if zipcode == event["location"]["zip_code"]:
            leftNode = list(matcher.match("Zipcode").where("_.zipcode='" + zipcode + "'"))
            for left in leftNode:
                rightNode = list(matcher.match("Event").where("_.zipcode='" + zipcode + "'"))
                for right in rightNode:
                    relation = Relationship(left, "Has", right)
                    graph.create(relation)

In [23]:
# Load User Reviews data

reviews = []
with open('./data-export/mongo/lookup_reviews.json') as json_file:
    reviews = json.load(json_file)

In [24]:
# Create Review Nodes

for review in reviews:
    new_review = {
        "business_id": review["business_id"], 
        "text": review["text"], 
        "rating": review["rating"], 
        "user": review["user"]["name"]
    }
    graph.create(Node("Review", **new_review))

In [25]:
# Create Relationship [:Reviewed] between User and Review

for user in users:
    name = user["firstName"] + ", " + user["lastName"]
    for review in reviews:
        username = review["user"]["name"]
        businessId = review["business_id"]
        if name == username:
            leftNode = list(matcher.match("User")
                            .where("_.firstName=~ '" + user["firstName"] +"'  ")
                            .where("_.lastName=~ '" + user["lastName"] + "' ")
                           )
            for left in leftNode:
                rightNode = list(matcher.match("Business").where("_.id='" + businessId + "'"))
                for right in rightNode:
                    relation = Relationship(left, "Reviewed", right)
                    graph.create(relation)