Code and data is provided by LinkedIn Learning: Create a Data Project with Neo4j (https://www.linkedin.com/learning/create-a-data-project-with-neo4j?u=76870426)

## Transaction

In [1]:
from py2neo import Graph, Node, Relationship, NodeMatcher
import pprint

In [2]:
# Connect to Neo4j with data already created in 01_DataImport.ipynb
g = Graph("neo4j+s://bd9aa907.databases.neo4j.io", auth=("neo4j", "2mw9RXAsmXh1BWKxZYeQnq0Qf-uyvC5C0EMlG9uyLa4"))
matcher = NodeMatcher(g)

In [4]:
# Start the transaction
tx = g.begin() # Allows rollback later
try:
    # Create the user node (does not check of node already exists!!)
    a = matcher.match("Person", device_id="0000000000001").first()
    if (a== None):

        a = Node("Person", device_id="0000000000001", user_name = "Andreas Kretz")
        tx.create(a)

        # Check if node has been created if not raise exception to rollback transaction
        if(tx.exists(a) == False):
            raise (Exception) 

    # Create the relationship (user)-[VISITED]-(Business)
    b = matcher.match("Business", business_id="0322120-04-001").first()

    propierties = {"scan_timestamp":"2022-01-01 12:55:55"}
    # Creates a directional relationship from a to b with properties added
    r = Relationship(a, "VISITED", b, **propierties)
    r.identity = None
    tx.create(r)
    
    # check if relationship has been created if not raise exception to rollback transaction
    if(tx.exists(r) == False):
        raise (Exception)

    # commit the transaction if everything was successful
    g.commit(tx)

except Exception as e:
    g.rollback(tx) # Rollback the transaction on error
    print(e)

## Query

Check query execution plan by adding PROFILE in front of query<br>
Query optimization:
1. Reduce the amount of data scanned for query by defining node label for the search (ex. MATCH (p:Person))
2. Use index


Can confirm successful transaction by running following queries in browser:

In [14]:
# Use queries in neo4j browser Bloom

# MATCH (p:Person)
# WHERE p.user_name = 'Andreas Kretz'
# RETURN p;
   
# MATCH (b:Business)
# WHERE b.business_id = '0322120-04-001'
# RETURN b;

# PROFILE
# MATCH (p:Person)
# WHERE p.user_name = 'Andreas Kretz'
# RETURN p;

# CREATE INDEX FOR (p:Person) ON (p.user_name);

In [None]:
# Create connection
g = Graph("neo4j+s://bd9aa907.databases.neo4j.io", auth=("neo4j", "2mw9RXAsmXh1BWKxZYeQnq0Qf-uyvC5C0EMlG9uyLa4"))

### Example 1
Find where Gilbert went and which zip code the business is in.

In [7]:
# Using :ISLOCATED only shows the zip (directional + relation specified)
result = g.run("MATCH (p:Person)-[VISITED]-(Business)-[:ISLOCATED]-(Zip)\
                WHERE p.user_name = 'Gilbert Harris'\
                RETURN p,Business,Zip").data()
pprint.pprint(result)
pprint.pprint(len(result))

[{'Business': {'business_address': '101 Townsend St 301',
               'business_id': '0381970-05-001',
               'business_name': 'Nwv',
               'latitude': 37.780501,
               'longitude': -122.390681},
  'Zip': Node('Zip', zip=94107),
  'p': Node('Person', deviceID='93545840801', user_name='Gilbert Harris')},
 {'Business': {'business_address': '3288 21st St 32',
               'business_id': '0320944-01-001',
               'business_name': 'Smartguy Records Of N America',
               'latitude': 37.756935,
               'longitude': -122.420957},
  'Zip': Node('Zip', zip=94110),
  'p': Node('Person', deviceID='93545840801', user_name='Gilbert Harris')},
 {'Business': {'business_address': '2358 Market St Ste 3',
               'business_id': '1041513-10-151',
               'business_name': 'Law Ofc Of Darlene B Comstedt',
               'latitude': 37.763273,
               'longitude': -122.434366},
  'Zip': Node('Zip', zip=94114),
  'p': Node('Person', dev

In [8]:
# Alternative way,  with an arrow instead of naming the relationship
result = g.run("MATCH (p:Person)-->(Business)-->(Zip)\
                WHERE p.user_name = 'Gilbert Harris'\
                RETURN p,Business,Zip").data()
#pprint.pprint(result)
pprint.pprint(len(result))

8


In [9]:
# Without specifying the direction of the relation or mention the relation by name, yield different result
result = g.run("MATCH (p:Person)--(Business)--(Zip)\
                WHERE p.user_name = 'Gilbert Harris'\
                RETURN p,Business,Zip").data()
#pprint.pprint(result)
pprint.pprint(len(result))

160


### Example 3
Find everyone who went to a Bank that has 'Bank' in its name using a regex.

In [10]:
### Play around with the relation and see different results try (-- or <-- or --> ) and expore the results
result = g.run("MATCH (Business)-[vis:VISITED]-(Person)\
                WHERE Business.business_name =~ '.*Bank.*'\
                RETURN Business,Person").data()
#pprint.pprint(result)
pprint.pprint(len(result))

140


In [12]:
# Having the direction as arrow here yields different result, this finds the number of business that has 'Bank' in the name
result = g.run("MATCH (Business)-->(Person)\
                WHERE Business.business_name =~ '.*Bank.*'\
                RETURN Business,Person").data()
#pprint.pprint(result)
pprint.pprint(len(result))

7


In [13]:
# Flipping the arrow will yield the same result because we are trying to find the person
result = g.run("MATCH (Business)<--(Person)\
                WHERE Business.business_name =~ '.*Bank.*'\
                RETURN Business,Person").data()
#pprint.pprint(result)
pprint.pprint(len(result))

140


In [11]:
# Find everyone who visited a bank on Jan 3rd
result = g.run("MATCH (p:Person)-[vis:VISITED]-(b:Business)\
                WHERE vis.scan_timestamp =~ '2022-01-03.*' \
                AND b.business_name =~ '.*Bank.*'\
                RETURN b, p").data()
#pprint.pprint(result)
pprint.pprint(len(result))

49
