In [2]:
from neo4j import GraphDatabase


driver = None

def init(uri, user, password):
    return GraphDatabase.driver(uri, auth=(user, password))

driver = init(
    "bolt://localhost:7687", 
    "neo4j", 
    "kgtransport"
)

### (LO2) Logical Representation

A set of CYPHER queries was written to showcase how locical queries can be applied to the KG.

Q1: For example the query: Find all street segments where the average traffic speed exceeds 15km/h on 11-5-2922 at 18:20:

In First Order Logic:

S(x) represents a street segment x.
AT(x, t) represents the average traffic speed on street segment x at time t.
T(t) represents the timestamp: 11-5-2922 18:20.

∀x (S(x) → (A(x, T(11-5-2922, 18:20)) > 15km/h))

can be depicted using the following CYPHER query:

"MATCH q=(r:Road)-[:ROAD_DATE]->(d:Date{name:'11-5-2022'}), (d)-[:DATE_TIME]->(t:Time{name:'18:20'}), (t)-[:HAS_TRAFFIC_SITUATION]->(tr:TrafficSituation WHERE (tr.speed > 16)) RETURN r LIMIT 25"


In [15]:
query = "MATCH q=(r:Road)-[:ROAD_DATE]->(d:Date{name:'11-5-2022'}), (d)-[:DATE_TIME]->(t:Time{name:'18:20'}), (t)-[:HAS_TRAFFIC_SITUATION]->(tr:TrafficSituation WHERE (tr.speed > 16)) RETURN r LIMIT 25"

result = driver.execute_query(
    query
)
result[0]

[<Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:826' labels=frozenset({'Road'}) properties={'latitude': 48.216403322294354, 'name': 'Innerer Gürtel-41', 'length': '41', 'longitude': 16.341783329844475}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2570' labels=frozenset({'Road'}) properties={'latitude': 48.19758671986405, 'length': '478', 'name': 'Stadionbrücke - Erdberger Lände-478', 'longitude': 16.410578302545474}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2569' labels=frozenset({'Road'}) properties={'latitude': 48.17802399683387, 'name': 'Quellenstraße-295', 'length': '295', 'longitude': 16.35751663062365}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2568' labels=frozenset({'Road'}) properties={'latitude': 48.17552722008042, 'name': 'Quellenstraße-1511', 'length': '1511', 'longitude': 16.376334088929237}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2566' labe

Q2: Find all street segments where the average traffic speed is between 5-10km/h

In [16]:
query = "MATCH q=(r:Road)-[:ROAD_DATE]->(d:Date), (d)-[:DATE_TIME]->(t:Time), (t)-[:HAS_TRAFFIC_SITUATION]->(tr:TrafficSituation WHERE (tr.speed > 5 AND tr.speed < 10)) RETURN r LIMIT 25"

result = driver.execute_query(
    query
)

result[0]

[<Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:846' labels=frozenset({'Road'}) properties={'latitude': 48.212599977850914, 'name': 'Schwedenbrücke-19', 'length': '19', 'longitude': 16.379236662760377}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2570' labels=frozenset({'Road'}) properties={'latitude': 48.19758671986405, 'length': '478', 'name': 'Stadionbrücke - Erdberger Lände-478', 'longitude': 16.410578302545474}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2569' labels=frozenset({'Road'}) properties={'latitude': 48.17802399683387, 'name': 'Quellenstraße-295', 'length': '295', 'longitude': 16.35751663062365}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2568' labels=frozenset({'Road'}) properties={'latitude': 48.17552722008042, 'name': 'Quellenstraße-1511', 'length': '1511', 'longitude': 16.376334088929237}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2567' labe

Q3: An urban planner may be interested to find out whether the roads in the viccinity of schools are safe. 

"Find all streets which are located nearby a school and the roads are exceeding the speed of 16km/h"

In first order logic:

Predicates:

Street(x): x is a street.
School(x): x is a school.
IsNearby(x, y): x is located nearby y.
HasTrafficSituation(x, z): x has a speed of of z km/h.

Variables:

x, y: Variables to represent streets, schools, and roads.

∀x∀y(Street(x) ∧ School(y) ∧ IsNearby(x,y) ∧ Speed(x,z) ∧ z > 16)

In CYPHER: 

For that the query the planner needs to:

1) Find all streets which are located nearby a school. 

For that the category id needs to be conjugated in the cypther query:
<ul>
    <li>Primary School (12057)</li>
    <li>Secondary School (12058)</li>
    <li>High School (12059)</li>
    <li>Middle School (12060)</li>
    <li>Private School (12061)</li>
</ul>

2) Identify the roads which are exceeding the avg. speed of 16km/h


This query can be written in CYPHER as:

MATCH (p:POI)-[:IS_LOCATED]->(r:Road), 
(r:Road)-[:ROAD_DATE]->(d:Date), 
(d)-[:DATE_TIME]->(t:Time), 
(t)-[:HAS_TRAFFIC_SITUATION]->(tr:TrafficSituation) 
WHERE (tr.speed > 16)
    AND p.category = '12057' 
    OR p.category = '12058'
    OR p.category = '12059' 
    OR p.category = '12060' 
    OR p.category = '12061' 
RETURN DISTINCT r LIMIT 100



In [36]:
# Query for 1)

query = "MATCH (p:POI)-[:IS_LOCATED]->(r:Road) WHERE p.category = '12057' OR p.category = '12058' OR p.category = '12059' OR p.category = '12060' OR p.category = '12061' RETURN r LIMIT 100"

result = driver.execute_query(
    query
)

result[0]

[<Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1181' labels=frozenset({'Road'}) properties={'latitude': 48.2178949797526, 'name': 'Schottenring-122', 'length': '122', 'longitude': 16.37047501746565}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1203' labels=frozenset({'Road'}) properties={'latitude': 48.21447886065471, 'name': 'Salztorbrücke-572', 'length': '572', 'longitude': 16.37393297251947}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:845' labels=frozenset({'Road'}) properties={'latitude': 48.21932472698845, 'name': 'Roßauer Brücke-936', 'length': '936', 'longitude': 16.368342810108615}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:850' labels=frozenset({'Road'}) properties={'latitude': 48.218269986100495, 'name': 'Roßauer Lände-8', 'length': '8', 'longitude': 16.369805010035634}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1342' labels=frozenset({'Road'}) pro

In [46]:
# Query for 2)

query = """
MATCH (p:POI)-[:IS_LOCATED]->(r:Road), 
(r:Road)-[:ROAD_DATE]->(d:Date), 
(d)-[:DATE_TIME]->(t:Time), 
(t)-[:HAS_TRAFFIC_SITUATION]->(tr:TrafficSituation) 
WHERE (tr.speed > 16)
    AND p.category = '12057' 
    OR p.category = '12058'
    OR p.category = '12059' 
    OR p.category = '12060' 
    OR p.category = '12061' 
RETURN DISTINCT r LIMIT 100"""

result = driver.execute_query(
    query
)

result[0]

[<Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1181' labels=frozenset({'Road'}) properties={'latitude': 48.2178949797526, 'name': 'Schottenring-122', 'length': '122', 'longitude': 16.37047501746565}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1203' labels=frozenset({'Road'}) properties={'latitude': 48.21447886065471, 'name': 'Salztorbrücke-572', 'length': '572', 'longitude': 16.37393297251947}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:845' labels=frozenset({'Road'}) properties={'latitude': 48.21932472698845, 'name': 'Roßauer Brücke-936', 'length': '936', 'longitude': 16.368342810108615}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:850' labels=frozenset({'Road'}) properties={'latitude': 48.218269986100495, 'name': 'Roßauer Lände-8', 'length': '8', 'longitude': 16.369805010035634}>>,
 <Record r=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1342' labels=frozenset({'Road'}) pro

## LO 9 / LO 11 

Scenario 1:

A traffic planner may be interested in all roads affected by an incident. Thus the traffic planner selects an incident on the map and tries to predict roads affected by it by using the relationship 'IS_NEARBY':

In the following an implementation of the code is shown. Subsequently a UI is introduced where the user is able to select different models without having to interact with the code.

In [86]:
#from pykeen.models import predict
import torch
from pykeen.triples import TriplesFactory
from pykeen import predict
from pathlib import Path
from pykeen.utils import set_random_seed
import os
import pandas as pd

working_dir = Path(os.getcwd()).parent
print(working_dir)

set_random_seed(0)

PATH = os.path.join(working_dir, '4_kge/results/RotatE/trained_model.pkl')
TRIPLES_PATH = os.path.join(working_dir, '4_kge/triples')


ENTITY_TO_ID_FILE_PATH = os.path.join(working_dir, '4_kge/triples/entity_to_id.tsv')

tf = TriplesFactory.from_path_binary(TRIPLES_PATH)

def predict_tail(head_node_id, relationship, count=5):
    head_node_id = int(head_node_id)
    pykeen_id = head_node_id
    pred = None
    if pykeen_id is not None:
        model = torch.load(PATH)
        pred = predict.predict_target(
            model,
            head=pykeen_id,
            relation=relationship,
            triples_factory= tf
        )
        pred = pred.df['tail_label'].head(count).to_list()
    return pred

## 

predict_tail(751,'IS_NEARBY')


/Users/yikaiyang/Projects/SS22-Knowledge-Graph


[953, 1188, 889, 1205, 1246]

In [70]:
# Subsequently the nodes with the IDs = [976, 878, 835, 1180, 1051] have to be fetched from the Neo4J database

query = """
MATCH (n) WHERE ID(n) IN [953, 1188, 889, 1205, 1246] RETURN n"""

result = driver.execute_query(
    query
)

result[0]

[<Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:953' labels=frozenset({'Road'}) properties={'latitude': 48.17999243229613, 'name': 'Gaudenzdorfer Gürtel-706', 'length': '706', 'longitude': 16.349012335247938}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1188' labels=frozenset({'Road'}) properties={'latitude': 48.18176523403963, 'length': '319', 'name': 'Siebenbrunnengasse - Margaretengürtel-319', 'longitude': 16.34730918245623}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:889' labels=frozenset({'Road'}) properties={'latitude': 48.2124322405085, 'name': 'Marienbrücke-270', 'length': '270', 'longitude': 16.37621635759327}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1205' labels=frozenset({'Road'}) properties={'latitude': 48.181312675508586, 'name': 'Äußerer Gürtel-598', 'length': '598', 'longitude': 16.34725094868386}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:12

The query yields the results: 'Gaudenzdorfer Gürtel-706', 'Siebenbrunnengasse - Margaretengürtel-319', 'Marienbrücke-270', 'Äußerer Gürtel-598', 'Aspernbrückengasse-92' using the model RotatE.

## LO 9 / LO 11 
Scenario 2:

In this scenario, a similar scenario is applied that was presented by the paper from Tan et. al. to demonstrate how KGEs can forecast traffic outcomes given a specific time:

Since there are no entities which categorizes the time component as two entities (Before_7:15) and (After_7:15) in the KG the entities (7:20) and (9:00) were chosen as replacements.


In [73]:
# First we need to find out the Node ID of the time entity '7:20'
query = """MATCH (n:Time {name: '7:20'}) RETURN ID(n)"""

result = driver.execute_query(
    query
)

result[0]

[<Record ID(n)=802>]

In [75]:
# Then we need to find out the Node ID of the time entity '9:00'
query = """MATCH (n:Time {name: '9:0'}) RETURN ID(n)"""

result = driver.execute_query(
    query
)
result[0]

[<Record ID(n)=807>]

In [90]:
# Predictions for time entity '7:20'
print(predict_tail(802,'HAS_TRAFFIC_SITUATION'))

# Predictions for time entity '9:00'
print(predict_tail(807,'HAS_TRAFFIC_SITUATION'))

[2564, 1315, 1016, 1091, 1163]
[1233, 1016, 1080, 1025, 857]


In [81]:
# Predictions for time entity '7:20'
query = """
MATCH (n) WHERE ID(n) IN [2564, 1315, 1016, 1091, 1163] RETURN n"""

result = driver.execute_query(
    query
)

result[0]

[<Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:2564' labels=frozenset({'Road'}) properties={'latitude': 48.20577998645604, 'name': 'Rotundenbrücke-29', 'length': '29', 'longitude': 16.396400034427643}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1315' labels=frozenset({'Road'}) properties={'latitude': 48.20579990606989, 'name': 'Rotundenbrücke-1493', 'length': '1493', 'longitude': 16.39806129962679}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1016' labels=frozenset({'Road'}) properties={'latitude': 48.1809655437246, 'name': 'Gürtel-38', 'length': '38', 'longitude': 16.359032783657312}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1091' labels=frozenset({'Road'}) properties={'latitude': 48.23382147814147, 'name': 'Gürtelbrücke-363', 'length': '363', 'longitude': 16.356311397585085}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1163' labels=frozenset({'Road'}) prope

In [89]:
query = """
MATCH (n) WHERE ID(n) IN [1233, 1016, 1080, 1025, 857] RETURN n"""

result = driver.execute_query(
    query
)

result[0]

[<Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1233' labels=frozenset({'Road'}) properties={'latitude': 48.18252444267273, 'name': 'Landgutgasse-274', 'length': '274', 'longitude': 16.363552240654826}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1016' labels=frozenset({'Road'}) properties={'latitude': 48.1809655437246, 'name': 'Gürtel-38', 'length': '38', 'longitude': 16.359032783657312}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1080' labels=frozenset({'Road'}) properties={'latitude': 48.180151964115794, 'name': 'Matzleinsdorfer Platz-74', 'length': '74', 'longitude': 16.35941115411697}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:1025' labels=frozenset({'Road'}) properties={'latitude': 48.22606667255362, 'name': 'Äußerer Gürtel-56', 'length': '56', 'longitude': 16.34950779688855}>>,
 <Record n=<Node element_id='4:01c73c9a-aa6d-420a-96a7-3c39c7f51efc:857' labels=frozenset({'Road'}) pro