In [1]:
import json
import math
import os
import sys
import xml.dom.minidom

from geographiclib.geodesic import Geodesic

geod = Geodesic.WGS84  # define the WGS84 ellipsoid

# Make sure local modules can be imported
module_path_root = os.path.abspath(os.pardir)
if module_path_root not in sys.path:
    sys.path.append(module_path_root)

In [2]:
# Specify the locality whose OSM file we want to load

#locality = 'Mount Eliza'
locality = 'Mount Eliza Sample'

In [3]:
# Cache ways in dict by name
ways_by_name  = {} # Dictionary giving, for each way name, a list of ways
ways_by_id    = {} # Dictionary to look up the way details by the way id
ways_per_node = {} # Dictionary giving, for each node, the list of way NAMES attached to the node

nodes = {}
    

In [4]:
def process_osm_xml(doc, intersections_only=False):
    # Get ways and nodes from XML document
    ways_xml  = doc.getElementsByTagName('way')
    nodes_xml = doc.getElementsByTagName('node')

    for way in ways_xml:
        # Get the ID for this way
        way_id = way.getAttribute('id')
       
        # Find the name for the way based on a 'tag' element where k='name'
        tags = way.getElementsByTagName('tag')
        for tag in tags:
            k = tag.getAttribute('k')
            if k == 'name':
                way_name = tag.getAttribute('v').upper()
            
                # Add this way to the list of ways by that name
                if not intersections_only:
                    if way_name in ways_by_name:
                        ways_by_name[way_name].append(way)
                    else:
                        ways_by_name[way_name] = [way]
            
                    # Records the way by its way id
                    # We only add ways that have a name, implicitly excluding "natural" ways such as coastline
                    ways_by_id[way_id] = way
    
                # Record the association with this way against the node
                # We can tell that an intersection is a node associated with multiple ways
                node_refs = way.getElementsByTagName('nd')
                for node_ref in node_refs:
                    ref = node_ref.getAttribute('ref')
                    if ref in ways_per_node:
                        if way_name not in ways_per_node[ref]:
                            ways_per_node[ref].append(way_name)
                    else:
                        ways_per_node[ref] = [way_name]
        
            # We also want to record intersections with unnamed "junctions" e.g. roundabouts
            if k == 'junction':
                way_name = tag.getAttribute('v').upper()
            
                # Record the association with this way against the node
                # We can tell that an intersection is a node associated with multiple ways
                node_refs = way.getElementsByTagName('nd')
                for node_ref in node_refs:
                    ref = node_ref.getAttribute('ref')
                    if ref in ways_per_node:
                        if way_name not in ways_per_node[ref]:
                            ways_per_node[ref].append(way_name)
                    else:
                        ways_per_node[ref] = [way_name]
                 
    # Cache nodes in dict by id/ref
    if not intersections_only:
        for node in nodes_xml:
            id = node.getAttribute('id').upper()
            nodes[id] = node
    
        print('Way count:          %d' % ways_xml.length)
        print('Included ways:      %d' % len(ways_by_id.keys()))
        print('Way names:          %d' % len(ways_by_name.keys()))
        print('Node count:         %d' % nodes_xml.length)
        
    # Count intersections
    intersection_count = 0
    
    for node in ways_per_node.keys():
        if len(ways_per_node[node]) > 1:
            intersection_count = intersection_count + 1
    
    print('Intersection count: %d' % intersection_count)

In [5]:
# Load the main XML file into memory
# This assumes that we have reduced the OpenStreetMap data down to a small enough locality
# that the in-memory approach is feasible

osm_filename_main = os.path.join(os.pardir, 'data_sources', 'Locality_' + locality.replace(' ', '_') + '.osm')
doc_main = xml.dom.minidom.parse(osm_filename_main)

process_osm_xml(doc_main, intersections_only=False)

Way count:          203
Included ways:      85
Way names:          63
Node count:         3594
Intersection count: 82


In [6]:
# Load a slightly bigger XML file into memory, to catch nodes that are JUST outside the
# boundary of the locality

osm_filename_margin = os.path.join(os.pardir, 'data_sources', 'Locality_' + locality.replace(' ', '_') + '_margin.osm')
doc_margin = xml.dom.minidom.parse(osm_filename_margin)

process_osm_xml(doc_margin, intersections_only=True)

Intersection count: 107


In [7]:
def bearing_from_nodes(prev_node, next_node):
    lat1 = float(prev_node.getAttribute('lat'))
    lon1 = float(prev_node.getAttribute('lon'))
    lat2 = float(next_node.getAttribute('lat'))
    lon2 = float(next_node.getAttribute('lon'))
    
    bearing = geod.Inverse(lat1, lon1, lat2, lon2)['azi1']
    if bearing < 0:
        bearing = bearing + 360
        
    return bearing

In [8]:
def expand_offsets(lat1, lon1, lat2, lon2, max_offset, interval, way_id, node_id):
    sample_points = []
    
    bearing = geod.Inverse(float(lat1), float(lon1), float(lat2), float(lon2))['azi1']
    
    line = geod.InverseLine(float(lat1), float(lon1), float(lat2), float(lon2))
    
    num_steps     = int(math.ceil(abs(max_offset) / interval))
    num_steps_max = int(math.ceil(line.s13 / interval))
        
    #if num_steps_max < num_steps:
    #    num_steps = num_steps_max
    
    if max_offset < 0:
        polarity = -1
    else:
        polarity = 1
        
    for step_i in range(num_steps + 1):
        if step_i > 0:
            s = min(interval * step_i, line.s13)
            g = line.Position(s, Geodesic.STANDARD | Geodesic.LONG_UNROLL)
            
            sample_point = [
                g['lat2'],
                g['lon2'],
                bearing,
                step_i * interval * polarity,
                way_id,
                node_id
            ]
            
            sample_points.append(sample_point)
    
    return sample_points

In [9]:
def walk_way_intersections_by_id(way_id, min_offset=0, max_offset=0, interval=10, debug=False):
    # Initialise list of points that will be returned
    sample_points = []
    
    # Retrieve the way
    way = ways_by_id[way_id]
    
    # Iterate through nodes in the way
    node_refs = way.getElementsByTagName('nd')
    
    idx_first = 0
    idx_last  = len(node_refs) - 1
    
    for idx, node_ref in enumerate(node_refs):
        ref = node_ref.getAttribute('ref')
        
        ways_for_node = ways_per_node[ref]
        
        if len(ways_for_node) > 1:
            if debug:
                print('Debug Node Intersection: {0:s} {1:d} {2:.6f}, {3:.6f}'.format(ref, len(ways_for_node),
                     float(nodes[ref].getAttribute('lat')), float(nodes[ref].getAttribute('lon'))))
            
            # Found an intersection!  We will output for this one
            
            # Find any negative offset samples required
            if idx > idx_first and min_offset < 0 and interval > 0:
                prev_points = expand_offsets(
                    nodes[node_refs[idx  ].getAttribute('ref')].getAttribute('lat'),
                    nodes[node_refs[idx  ].getAttribute('ref')].getAttribute('lon'),
                    nodes[node_refs[idx-1].getAttribute('ref')].getAttribute('lat'),
                    nodes[node_refs[idx-1].getAttribute('ref')].getAttribute('lon'),
                    min_offset,
                    interval,
                    way_id,
                    ref
                )
                
                sample_points = sample_points + prev_points[::-1] # Reversed with slicing
            
            # Find the bearing at the node itself, and output the node itself
            if idx == idx_first:
                bearing = bearing_from_nodes(
                    nodes[node_refs[idx  ].getAttribute('ref')],
                    nodes[node_refs[idx+1].getAttribute('ref')]
                )
            elif idx == idx_last:
                bearing = bearing_from_nodes(
                    nodes[node_refs[idx-1].getAttribute('ref')],
                    nodes[node_refs[idx  ].getAttribute('ref')]
                )
            else:
                bearing = bearing_from_nodes(
                    nodes[node_refs[idx-1].getAttribute('ref')],
                    nodes[node_refs[idx+1].getAttribute('ref')]
                )

            sample_point = [
                float(nodes[ref].getAttribute('lat')), float(nodes[ref].getAttribute('lon')), bearing, 0, way_id, ref
            ]
                        
            sample_points.append(sample_point)
                        
            # Find any postive offset samples required
            if idx < idx_last and max_offset > 0 and interval > 0:
                sample_points = sample_points + expand_offsets(
                    nodes[node_refs[idx  ].getAttribute('ref')].getAttribute('lat'),
                    nodes[node_refs[idx  ].getAttribute('ref')].getAttribute('lon'),
                    nodes[node_refs[idx+1].getAttribute('ref')].getAttribute('lat'),
                    nodes[node_refs[idx+1].getAttribute('ref')].getAttribute('lon'),
                    max_offset,
                    interval,
                    way_id,
                    ref
                )
        else:
            if debug:
                print('Debug Node NON-Intersection: {0:s} {1:d} {2:.6f}, {3:.6f}'.format(ref, len(ways_for_node),
                     float(nodes[ref].getAttribute('lat')), float(nodes[ref].getAttribute('lon'))))
        
    return sample_points

In [10]:
# Find intersection points (and offsets from intersections) for an example way

# 26662301 Nepean at Old Mornington Rd
# 61695915 Nepean Hwy slipway
# 172385134 Meadow Lane, Darvell Ln, Bareena Dr, Fulton Rd, Bellbird Rd
# 204757516 Arundel Ct, Blake Ct, Cummins Ln, Kirkstall Cl, Mather Rd, Sibyl Ave
# 306462563 Moorooduc Hwy
# 349358065 Nothing (near roundabout) FIXED
# 349358068 Nothing (near roundabout) FIXED
# 840490415 Mann Rd/Brighton St

# We are missing intersections at roundabouts (k="junction") => FIXED

# We are missing intersections where the other street is just outside the locality
# => Therefore we want to have a SEPARATE process to link nodes to ways, from another OSM file with a larger bounding box





points = walk_way_intersections_by_id('172385134', min_offset=-20, max_offset=+20, interval=10, debug=True)

print(points)

Debug Node Intersection: 1832927621 2 -38.173535, 145.108020
Debug Node NON-Intersection: 2925788066 1 -38.173857, 145.108272
Debug Node Intersection: 458415368 2 -38.175082, 145.109231
Debug Node Intersection: 458401333 2 -38.175601, 145.109629
Debug Node NON-Intersection: 6161031634 1 -38.176020, 145.109961
Debug Node NON-Intersection: 6161031624 1 -38.176601, 145.110415
Debug Node NON-Intersection: 2923267118 1 -38.177293, 145.110962
Debug Node Intersection: 458415279 2 -38.177889, 145.111428
Debug Node NON-Intersection: 5676363633 1 -38.178508, 145.111913
Debug Node NON-Intersection: 1832927605 1 -38.179294, 145.112528
Debug Node NON-Intersection: 4487020778 1 -38.179578, 145.112750
Debug Node NON-Intersection: 1832927625 1 -38.180980, 145.113847
Debug Node NON-Intersection: 458409379 1 -38.181868, 145.114543
Debug Node NON-Intersection: 6292688119 1 -38.183392, 145.115702
[[-38.1735351, 145.1080202, 148.29438720740316, 0, '172385134', '1832927621'], [-38.17361174515946, 145.108080

In [11]:
# Find intersection points (and offsets from intersections) for all ways
def sample_all_way_intersections(min_offset, max_offset, interval=10):
    all_points = []

    for way_id in ways_by_id.keys():
        points = walk_way_intersections_by_id(way_id, min_offset=min_offset, max_offset=max_offset, interval=interval)
    
        all_points = all_points + points

    return all_points

In [12]:
all_points_20 = sample_all_way_intersections(-20, +20, 10)
print('+/- 20m: ' + str(len(all_points_20)))

all_points_10 = sample_all_way_intersections(-10, +10, 10)
print('+/- 10m: ' + str(len(all_points_10)))

all_points_00 = sample_all_way_intersections(0, 0, 0)
print('+/- 00m: ' + str(len(all_points_00)))

+/- 20m: 710
+/- 10m: 444
+/- 00m: 178


In [13]:
print(all_points_20[0])

[-38.164117203073815, 145.10191188554293, -156.63308527325063, -20, '12453751', '638345458']


In [14]:
# Import local modules
import osm_gsv_utils.gsv_loader as gsv_loader

In [15]:
download_directory = os.path.join(module_path_root, 'data_sources', 'gsv')
apikey_filename    = os.path.join(module_path_root, 'apikey.txt')
batch_filename     = os.path.join(module_path_root, 'batches', locality.replace(' ', '_') + '_20m.csv')

# Initialise interface to Google Street View
gsv = gsv_loader(apikey_filename, download_directory)

In [16]:
gsv.save_batch(batch_filename, all_points_20)

In [17]:
print(all_points_20[0])
print(all_points_20[1])
print(all_points_20[2])
print(all_points_20[3])
print(all_points_20[4])
print(all_points_20[5])


[-38.164117203073815, 145.10191188554293, -156.63308527325063, -20, '12453751', '638345458']
[-38.16403450154621, 145.10195714282258, -156.63308527325063, -10, '12453751', '638345458']
[-38.1639518, 145.1020024, 22.246728873774174, 0, '12453751', '638345458']
[-38.163868198912255, 145.1020449231519, 21.879679036897624, 10, '12453751', '638345458']
[-38.16378459780793, 145.10208744620672, 21.879679036897624, 20, '12453751', '638345458']
[-38.16179386505896, 145.1030860307341, -158.51873773167307, -20, '12453751', '638345398']
