In [1]:
import neo4j_arrow as na
import xgt_arrow as xa
import pyarrow as pa
import pandas as pd
import time

In [2]:
import neo4j
driver = neo4j.GraphDatabase.driver('neo4j://localhost', auth=('neo4j', 'foo'))
driver

<neo4j.Neo4jDriver at 0x7f68d73228d0>

## Create an index over the `timestamp` attribute

In [3]:
%%time
with driver.session() as session:
    result = session.run("""
        CREATE INDEX timestamp_index IF NOT EXISTS
            FOR ()-[r:edge]-()
            ON (r.timestamp);"""
    )

CPU times: user 2.49 ms, sys: 3.03 ms, total: 5.52 ms
Wall time: 1.26 s


## Connect to neo4j Arrow Server

In [4]:
neo4j_client = na.Neo4jArrow("neo4j", "foo")
neo4j_client

<neo4j_arrow.Neo4jArrow at 0x7f68d366a650>

## Connect to Trovares xGT Server

In [5]:
xgt_gs=xa.GraphSession(userid="demouser")
xgt_gs

<xgt_arrow.GraphSession at 0x7f68d3675250>

## Define a function that pulls data from neo4j and inserts into xGT

In [6]:
def copy_data_into_xGT(neo4j_client, xgt_gs):
    t0 = time.time()
    # prepare to pull data from neo4j
    cypher_for_extract = """
MATCH (a)-[b]->(c)
RETURN a.id AS a, c.id AS c, b.timestamp as timestamp;
"""
    ticket = neo4j_client.cypher(cypher_for_extract)
    ready = neo4j_client.wait_for_job(ticket, timeout=60)
    if not ready:
        raise Exception('something is wrong...did you submit a job?')
    neo4j_reader = neo4j_client.stream(ticket).to_reader()
    # create graph schema in Trovares xGT
    xgt_writer = xgt_gs.arrow_edge_writer('Edges', neo4j_reader.schema,
                                          direction=("Node", "Node"),
                                          source_key="a", target_key="c")
    # move data from neo4j to xGT in chunks
    count = 0
    while (True):
        try:
            batch = neo4j_reader.read_next_batch()
            xgt_writer.write(batch)
            count += 1
        except StopIteration:
            break
    xgt_writer.close()
    xfer_time = time.time() - t0
    return xfer_time

## 2-hop path counts

In [7]:
%%time
with driver.session() as session:
    result = session.run("""
        MATCH (a)-[e0]->(b)-[e1]->(c)
        WHERE e0.timestamp <> e1.timestamp
        RETURN count(*) AS count"""
    )
    print(f"Neo4j> 2-hop answer Count: {result.single()[0]:,}")

Neo4j> 2-hop answer Count: 64,038,582
CPU times: user 10 ms, sys: 6.3 ms, total: 16.3 ms
Wall time: 1min 5s


In [8]:
xfer_time = copy_data_into_xGT(neo4j_client, xgt_gs)
print(f"Data transfer time: {xfer_time:,.2f}")

Data transfer time: 3.99


In [9]:
t0=time.time()
xgt_gs.query("""
    MATCH (a)-[e0]->(b)-[e1]->(c)
    WHERE e0.timestamp <> e1.timestamp
    RETURN count(*) AS count""",
)
result = xgt_gs.get_answers()
query_time=time.time()-t0
print(f"xGT> 2-hop answer Count: {result[0][0]:,}")
print(f"   Xfer Time: {xfer_time:,.2f}")
print(f"  Query Time: {query_time:,.2f}")
print(f"  Total time: {xfer_time + query_time:,.2f}")

xGT> 2-hop answer Count: 64,038,582
   Xfer Time: 3.99
  Query Time: 0.88
  Total time: 4.87


## Temporal Triangles

In [10]:
%%time
with driver.session() as session:
    result = session.run("""
        MATCH (a)-[e0]->(b)-[e1]->(c)-[e2]->(a)
        WHERE a <> b AND b <> c AND a <> c
          AND e0.timestamp <= e1.timestamp
          AND e1.timestamp <= e2.timestamp
          AND e2.timestamp - e0.timestamp < $threshold
        RETURN count(*) as count""",
        {"threshold": 42},
    )
    print(f"Neo4j> Temporal Triangles answer Count: {result.single()[0]:,}")

Neo4j> Temporal Triangles answer Count: 2
CPU times: user 18.1 ms, sys: 12.7 ms, total: 30.9 ms
Wall time: 1min 37s


In [11]:
t0=time.time()
xgt_gs.query("""
    MATCH (a)-[e0]->(b)-[e1]->(c)-[e2]->(a)
    WHERE a <> b AND b <> c AND a <> c
      AND e0.timestamp <= e1.timestamp
      AND e1.timestamp <= e2.timestamp
      AND e2.timestamp - e0.timestamp < $threshold
    RETURN count(*) as count""",
    params = {"threshold": 42},
)
result = xgt_gs.get_answers()
query_time=time.time()-t0
print(f"xGT> Temporal Triangles answer Count: {result[0][0]:,}")
print(f"   Xfer Time: {xfer_time:,.2f}")
print(f"  Query Time: {query_time:,.2f}")
print(f"  Total time: {xfer_time + query_time:,.2f}")

xGT> Temporal Triangles answer Count: 2
   Xfer Time: 3.99
  Query Time: 1.44
  Total time: 5.43
