In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

#NEO4J_URI = os.environ.get("NEO4J_URI")
NEO4J_URI = "bolt://localhost:7687"
#NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME")
#NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")

In [3]:
from graphdatascience import GraphDataScience

gds = GraphDataScience(NEO4J_URI)

## Populating Neo4j

In [3]:
import csv
with open("data/nr-stations-all.csv", "r", encoding="utf-8-sig") as file:
    reader = csv.DictReader(file)
    stations = list(reader)

print(f"Loaded {len(stations)} stations")

Loaded 2593 stations


In [4]:
# Load stations as nodes
gds.run_cypher('match (n) detach delete(n)')
for station in stations:
    gds.run_cypher(f'CREATE (:Station {{name: "{station["name"]}", crs: "{station["crs"]}"}});')

In [5]:
import csv
with open("data/nr-station-links.csv", "r", encoding="utf-8-sig") as file:
    reader = csv.DictReader(file)
    links = list(reader)

print(f"Loaded {len(links)} links")

Loaded 5802 links


In [6]:
for link in links:
    gds.run_cypher(
        f"""
            MATCH (st_from: Station), (st_to: Station)
            WHERE st_from.crs="{link["from"]}" and st_to.crs="{link["to"]}"
            MERGE (st_from)-[:TRACK {{distance: {round(float(link["distance"]), 2)}}}]->(st_to)
        """)

## Creating a graph

In [10]:
gds.graph.drop(gds.graph.get("trains"))

gds.graph.project.cypher(
    graph_name="trains",
    node_spec="MATCH (s:Station) RETURN id(s) AS id",
    relationship_spec=
        """
        MATCH (s1:Station)-[t:TRACK]->(s2:Station)
        RETURN id(s1) AS source, id(s2) AS target, t.distance AS distance
        """
)

(<graphdatascience.graph.graph_object.Graph at 0x7fba6aad6cc0>,
 nodeQuery                         MATCH (s:Station) RETURN id(s) AS id
 relationshipQuery    MATCH (s1:Station)-[t:TRACK]->(s2:Station)\n  ...
 graphName                                                       trains
 nodeCount                                                         2593
 relationshipCount                                                 5782
 projectMillis                                                       72
 Name: 0, dtype: object)

## Calculating the shortest path

In [11]:
bham = gds.find_node_id(["Station"], {"name": "Birmingham New Street"})
eboro = gds.find_node_id(["Station"], {"name": "Edinburgh"})

shortest_path = gds.shortestPath.dijkstra.stream(
    gds.graph.get("trains"),
    sourceNode=bham,
    targetNode=eboro,
    relationshipWeightProperty="distance"
)

print(f"Shortest distance: {shortest_path.get('costs').get(0)[-1]}")

Shortest distance: 295.91


## Calculating centrality

In [11]:
graph = gds.graph.get("trains")
result = gds.betweenness.stream(graph)
highest_score = result.sort_values(by="score", ascending=False).iloc[0:1].get("nodeId")

n = gds.run_cypher(
    f"""
    MATCH (s: Station)
    WHERE ID(s)={int(highest_score)}
    RETURN s.name
    """
)
print(f"Station with the highest centrality: {n['s.name'][0]}")

Station with the highest centrality: Tamworth


## Closing connection

In [12]:
gds.close()