In [11]:
!pip install py2neo
!pip install pandas

Collecting pandas
  Downloading pandas-1.3.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)
     |████████████████████████████████| 11.5 MB 5.4 MB/s            
Collecting numpy>=1.17.3
  Downloading numpy-1.21.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
     |████████████████████████████████| 15.7 MB 3.6 MB/s             
Installing collected packages: numpy, pandas
Successfully installed numpy-1.21.4 pandas-1.3.4


In [100]:
from py2neo import Graph, Node, Relationship, NodeMatcher
from py2neo.bulk import create_nodes
g = Graph("http://neo4j:7474/db/data/", auth=("neo4j", "root"))

In [101]:
import pandas as pd

In [107]:
users_df = pd.read_csv('./data-export/pg/users', delimiter=",")  

In [108]:
users_dict = users_df.to_dict('records')

In [109]:
users_dict

[{'id': 1, 'firstName': 'rahul', 'lastName': 'n'},
 {'id': 2, 'firstName': 'Rahul', 'lastName': 'Mud'},
 {'id': 3, 'firstName': 'Vi', 'lastName': 'Ka'},
 {'id': 4, 'firstName': 'Aman', 'lastName': 'K'},
 {'id': 5, 'firstName': 'ayush', 'lastName': 'k'},
 {'id': 6, 'firstName': 'Sanjay', 'lastName': 'R'}]

In [110]:
create_nodes(g.auto(), users_dict, labels={"User"})

In [111]:
users = g.run("MATCH (n:User) RETURN n.id").data()

[{'n.id': 1}, {'n.id': 2}, {'n.id': 3}, {'n.id': 4}, {'n.id': 5}, {'n.id': 6}]

In [112]:
orders_df = pd.read_csv('./data-export/pg/orders', delimiter=",")  

In [113]:
orders_dict = orders_df.to_dict('records')

In [114]:
create_nodes(g.auto(), orders_dict, labels={"Order"})

In [115]:
orders = g.run("MATCH (o:Order) RETURN o.id, o.userId").data()

[{'o.id': 5, 'o.userId': 4},
 {'o.id': 6, 'o.userId': 4},
 {'o.id': 7, 'o.userId': 5},
 {'o.id': 8, 'o.userId': 5},
 {'o.id': 9, 'o.userId': 5},
 {'o.id': 10, 'o.userId': 5},
 {'o.id': 11, 'o.userId': 6},
 {'o.id': 12, 'o.userId': 6},
 {'o.id': 1, 'o.userId': 1},
 {'o.id': 2, 'o.userId': 1},
 {'o.id': 3, 'o.userId': 2},
 {'o.id': 4, 'o.userId': 4}]

In [129]:
for user in users:
    for order in orders:
        user_id = user["n.id"]
        order_id = order["o.id"]
        cql1 = "match(n:User {id: " + str(user_id) + " }) return n limit 1"
        cql2 = "match(n:Order {id: " + str(order_id) + "}) return n limit 1"
        if user["n.id"] == order["o.userId"]:
            leftNode = g.run(cql1)
            for left in leftNode:
                rightNode = g.run(cql2)
                for right in rightNode:
                    relation = Relationship(left[0], "Requested Services From", right[0])
                    g.create(relation)

In [130]:
orders = g.run("MATCH (o:Order) RETURN o").data()

In [140]:
zipcode_set = set()
city_set = set()
businesses = {}

In [157]:
import json

# Opening JSON file
with open('./data-export/mongo/yelp_businesses.json') as json_file:
    businesses = json.load(json_file)

In [162]:
for business in businesses:
    if (business["location"] and business["location"]["city"]):
        city_set.add(business["location"]["city"])
    if (business["location"] and business["location"]["zip_code"]):
        zipcode_set.add(business["location"]["zip_code"])


In [163]:
for zipcode in zipcode_set:
    ZIPCODE = g.create(Node("Zipcode", zipcode=zipcode))

In [164]:
for city in city_set:
    CITY = g.create(Node("City", city=city))

In [200]:
for i in range(len(orders)):
    order_id = orders[i]["o"]["id"]
    address = orders[i]["o"]["address"]
    [_, city, state] = address.split(", ")
    [_, zipcode] = state.split(" - ")
    cql1 = "match(n:Order {id: " + str(order_id) + "}) return n limit 1"
    cql2 = "match(n:City) WHERE n.city='" + city +"' return n"
    leftNode = g.run(cql1)
    for left in leftNode:
        rightNode = g.run(cql2)
        for right in rightNode:
                relation = Relationship(left[0], "Located In", right[0])
                g.create(relation)

In [202]:
for i in range(len(orders)):
    order_id = orders[i]["o"]["id"]
    address = orders[i]["o"]["address"]
    [_, city, state] = address.split(", ")
    [_, zipcode] = state.split(" - ")
    cql1 = "match(n:City) WHERE n.city='" + city +"' return n"
    cql2 = "match(n:Zipcode) WHERE n.zipcode='" + zipcode +"' return n"
    leftNode = g.run(cql1)
    for left in leftNode:
        rightNode = g.run(cql2)
        for right in rightNode:
                relation = Relationship(left[0], "Has", right[0])
                g.create(relation)