In [1]:
from datetime import datetime

import xml.etree.cElementTree as ET
from collections import defaultdict

from geopy.geocoders import Nominatim

import requests

Load OSM dataset into memory/dictionaries

In [2]:
#file_path = "/Users/tylersaxton/thesis/planet_mteliza.osm"
file_path = "/Users/tylersaxton/thesis/planet_victoria.osm" # 10 mins and 9GB to cache

print(datetime.now())

context = ET.iterparse(file_path, events=("start", "end"))

context = iter(context)

way_id  = 0
node_id = 0

nodes_per_way = defaultdict(list)
ways_per_node = defaultdict(list)
ways_by_name  = defaultdict(list)
ways_by_id    = {}


for event, elem in context:
    tag = elem.tag
    value = elem.text
    
    if value:
        value = value.encode('utf-8').strip()
        
    if event == 'start':
        if tag == 'way':
            way_id = elem.get('id', 0)
        elif tag == 'nd':
            node_id = elem.get('ref', 0)
            if way_id != 0:
                nodes_per_way[way_id].append(node_id)
                ways_per_node[node_id].append(way_id)
        elif tag == 'tag':
            if way_id != 0 and elem.get('k', '?') == 'name':
                way_name = elem.get('v', '?')
                ways_by_name[way_name.upper()].append(way_id)
                ways_by_id[way_id] = way_name.upper()
                
    if event == 'end' and tag == 'way':
        way_id = 0

    elem.clear()

# Second pass to load the latitude and longitude of nodes IF AND ONLY IF
# they are involved in an intersection

print(datetime.now())

context = ET.iterparse(file_path, events=("start", "end"))

context = iter(context)

way_id  = 0
node_id = 0

node_lat      = {}
node_lon      = {}

for event, elem in context:
    tag = elem.tag
    value = elem.text
    
    if value:
        value = value.encode('utf-8').strip()
        
    if event == 'start':
        if tag == 'node':
            node_id = elem.get('id', 0)
            lat     = elem.get('lat', 0)
            lon     = elem.get('lon', 0)
            
            if len(ways_per_node[node_id]) > 1:
                node_lat[node_id] = lat
                node_lon[node_id] = lon

    elem.clear()

print(datetime.now())

2021-08-07 00:52:19.972275
2021-08-07 01:01:39.261148


In [3]:
def nominatim_search(street, city):
    # api-endpoint
    URL = "http://geo.local/nominatim/search"
    
    params = {
        'street': street,
        'city': city
    }
        
    # sending get request and saving the response as response object
    r = requests.get(url = URL, params = params)
    
    # extracting data in json format
    return r.json()

In [4]:
def is_overlapping(box1, box2, margin=0.01):
    # Add margin to box1
    box1_margin = []
    box1_margin.append(box1[0] - margin)
    box1_margin.append(box1[1] + margin)
    box1_margin.append(box1[2] - margin)
    box1_margin.append(box1[3] + margin)

    #print('Margin: ' + str(box1_margin))
    
    # Check if latitude or longitude overlaps
    lat_overlap = False
    lon_overlap = False
    
    if box1_margin[0] <= box2[0] <= box1_margin[1]:
        lat_overlap = True
    if box1_margin[0] <= box2[1] <= box1_margin[1]:
        lat_overlap = True
    if box1_margin[2] <= box2[2] <= box1_margin[3]:
        lon_overlap = True
    if box1_margin[2] <= box2[3] <= box1_margin[3]:
        lon_overlap = True
        
    return (lat_overlap and lon_overlap)

In [8]:
def find_intersections(street, city):
    # Find bounding_box of original street
    original_street = nominatim_search(street.upper(), city.upper())
    
    original_bounding_box_str = original_street[0]['boundingbox']
    original_bounding_box = [float(i) for i in original_bounding_box_str]
    
    print('Original:  ' + street.upper() + " = " + str(original_bounding_box))
    
    # Find way_id list for the name
    way_ids = ways_by_name[street.upper()]
    
    intersection_names = {}
    
    # Find every matching street name (possibly in another suburb!)
    for way_id in way_ids:
        # Find every node associated with that street
        for node_id in nodes_per_way[way_id]:
            # Find every other street the node is associated with
            for way_id2 in ways_per_node[node_id]:
                # Ignore any ways that were clipped from teh map
                if way_id2 in ways_by_id:
                    # Find the street name for the other potential intersecting street
                    intersection_name = ways_by_id[way_id2]
                    if intersection_name.upper() != street.upper():                        
                        # Check Nominatim service to see if boundary boxes roughly overlap
                        
                        pot_streets = nominatim_search(intersection_name.upper(), city.upper())
                        for pot_street in pot_streets:
                            pot_street_bounding_box_str = pot_street['boundingbox']
                            pot_street_bounding_box = [float(i) for i in pot_street_bounding_box_str]
                            
                            # print('Potential: ' + intersection_name + ' = ' + str(pot_street_bounding_box))
                            
                            if is_overlapping(original_bounding_box, pot_street_bounding_box):                          
                                intersection_names[intersection_name] = [float(node_lat[node_id]), float(node_lon[node_id])]

    return intersection_names

In [9]:
find_intersections('Millbank Drive', 'Mount Eliza')

Original:  MILLBANK DRIVE = [-38.1990811, -38.1894195, 145.1037342, 145.1064294]


{'BELLBIRD ROAD': [-38.1990811, 145.1044224],
 'MATHER ROAD': [-38.1959845, 145.1040959],
 'RYLSTON COURT': [-38.1940608, 145.1046181],
 'ESME COURT': [-38.192864, 145.1043193],
 'WALKERS ROAD': [-38.1894195, 145.1037342]}

In [10]:
find_intersections('Millbank Drive', 'Deer')

Original:  MILLBANK DRIVE = [-37.7603155, -37.7601512, 144.7649324, 144.7650983]


{'STEVENSTON STREET': [-37.7608924, 144.7654194],
 'CROYDON AVENUE': [-37.7624865, 144.7656246],
 'WANDSWORTH AVENUE': [-37.7652655, 144.7676181],
 'WELWYN PARADE': [-37.7660273, 144.7711462],
 'BILLINGHAM ROAD': [-37.7601512, 144.7649324]}