# Find Distinct Intersections

Find the distinct combinations of [local_street] x [town] in the reverse-geocoded PBN data.

Then, find the distinct intersections for those streets

In [1]:
import pandas as pd
import geopandas as gpd
import geopy
import geopy.distance

from shapely.geometry.linestring import LineString
from shapely.geometry.multilinestring import MultiLineString

from geographiclib.geodesic import Geodesic

from datetime import datetime
from tqdm.notebook import tqdm

import xml.etree.cElementTree as ET
from collections import defaultdict

from geopy.geocoders import Nominatim

import requests

import gmaps

timestamp_starting = 0

def log_starting(msg):
    global timestamp_starting
    timestamp_starting = datetime.now()
    print(str(timestamp_starting) + ' START - ' + msg, flush=True)

def log_finished(msg):
    global timestamp_starting
    timestamp_finished = datetime.now()
    timestamp_duration = timestamp_finished - timestamp_starting
    print(str(timestamp_finished) + ' END   - ' + msg
        + '(' + str(timestamp_duration.total_seconds()) + ')',
        flush=True
    )

In [2]:
log_starting('Load original PBN dataset')

pbn = gpd.read_file('Principal_Bicycle_Network_(PBN).geojson')

log_finished('Load original PBN dataset')

2021-08-10 20:17:41.530787 START - Load original PBN dataset
2021-08-10 20:17:45.505768 END   - Load original PBN dataset(3.974981)


In [3]:
log_starting('Read geocoded PBN dataset')

pbn_explode = gpd.read_file('pbn_exploded.geojson')

log_finished('Read geocoded PBN dataset')

2021-08-10 20:17:45.509012 START - Read geocoded PBN dataset
2021-08-10 20:18:38.486481 END   - Read geocoded PBN dataset(52.977469)


Load raw OpenStreetMap data for the area into memory.  This will be used to find all the intersections for each [local_street] x [town] mentioned in the PBN data.

The extract was downloaded from http://extract.bbbike.org.  Region limited to one small down for initial testing, then a box that encompasses all of Victoria.

The "Victoria" data took around 10 minutes to load into memory, and the resulting Python process used approximately 9GB of memory.

In [4]:
file_path = "/Users/tylersaxton/thesis/planet_victoria.osm"

log_starting('Read raw OpenStreetMap data')

# In-Memory caching via dictionary objects
nodes_per_way = defaultdict(list) # List of nodes in each way
ways_per_node = defaultdict(list) # List of ways associated with each node
ways_by_name  = defaultdict(list) # List of ways associated with each street name
ways_by_id    = {}                # List of ways by osm_id
node_lat      = {}                # Latitude of an intersection node by oms_id
node_lon      = {}                # Longitude of an intersection node by oms_id

# Read the OpenStreetMap XML file
context = ET.iterparse(file_path, events=("start", "end"))
context = iter(context)

way_id  = 0  # Keep track of which "way" object we are reading from XML, 0=none
node_id = 0  # Keep track of which "node" (nd) object we are reading from XML, 0=none

# Iterate through every XML element in the file as it starts or finishes
# This approach allows us to "stream" the XML rather than try to load it all into
# memory at once.  We only cache what is important to us.
for event, elem in context:
    tag   = elem.tag
    value = elem.text
    
    if value:
        value = value.encode('utf-8').strip()
    
    # Process the start of an XML tag
    if event == 'start':
        # Process "way" objects
        if tag == 'way':
            way_id = elem.get('id', 0)
        # Process "node" (nd) objetcts inside (associated with) a "way"
        elif tag == 'nd':
            node_id = elem.get('ref', 0)
            if way_id != 0:
                # Record that this node was inside this way
                nodes_per_way[way_id].append(node_id)
                ways_per_node[node_id].append(way_id)
        # Process "tag" objects that give a street name for each "way"
        elif tag == 'tag':
            if way_id != 0 and elem.get('k', '?') == 'name':
                way_name = elem.get('v', '?')
                ways_by_name[way_name.upper()].append(way_id)
                ways_by_id[way_id] = way_name.upper()
                
    # At the end of an XML tag, if it was a "way" then record that we are no longer
    # in the middle of reading a "way"
    if event == 'end' and tag == 'way':
        way_id = 0

    elem.clear()

log_finished('Read raw OpenStreetMap data')


# Second pass to load the latitude and longitude of nodes IF AND ONLY IF
# they are involved in an intersection

log_starting('Find Lat/Lon for intersections')

context = ET.iterparse(file_path, events=("start", "end"))
context = iter(context)

way_id  = 0
node_id = 0

for event, elem in context:
    tag   = elem.tag
    value = elem.text
    
    if value:
        value = value.encode('utf-8').strip()
    
    # Process the start of an XML tag
    if event == 'start':
        # Find the latitude/longitude for each "node" by its oms_id
        if tag == 'node':
            node_id = elem.get('id', 0)
            lat     = elem.get('lat', 0)
            lon     = elem.get('lon', 0)
            
            # If and only if this "node" had more than one "way" associated with it,
            # it is part of an "intersection" and therefore we record its latitude
            # and longitude in memory, by its oms_id
            if len(ways_per_node[node_id]) > 1:
                node_lat[node_id] = lat
                node_lon[node_id] = lon

    elem.clear()

log_finished('Find Lat/Lon for intersections')

2021-08-10 20:18:38.493604 START - Read raw OpenStreetMap data
2021-08-10 20:23:39.221890 END   - Read raw OpenStreetMap data(300.728286)
2021-08-10 20:23:39.223706 START - Find Lat/Lon for intersections
2021-08-10 20:29:12.865590 END   - Find Lat/Lon for intersections(333.641884)


Define a function to directly call the Nominatim geocode service to turn lat/lon into geocoded information, because we need the "bounding box" part of the data, which is not returned by the Nominatim API.

In [59]:
cached_nominatim_search = {}

def nominatim_search(street, city):
    # Check local cache first, used cached results if available
    key = street + ' - ' + city
    
    if key in cached_nominatim_search:
        return cached_nominatim_search[key]
    
    # api-endpoint
    URL = "http://geo.local/nominatim/search"
    
    params = {
        'street': street,
        'city': city
    }
        
    # sending get request and saving the response as response object
    r = requests.get(url = URL, params = params)
    
    # Cache results
    cached_nominatim_search[key] = r.json()
    
    # extracting data in json format
    return r.json()

Define a function to determine whether two bounding boxes overlap.  Include a "margin" to account for bounding boxes that almost overlap but are just off by a small margin, the size of an intersection or so.

In [60]:
def is_overlapping(box1, box2, margin=0.05):
    # Add margin to box1
    box1_margin = []
    box1_margin.append(box1[0] - margin)
    box1_margin.append(box1[1] + margin)
    box1_margin.append(box1[2] - margin)
    box1_margin.append(box1[3] + margin)

    #print('Margin: ' + str(box1_margin))
    
    # Check if latitude or longitude overlaps
    lat_overlap = False
    lon_overlap = False
    
    if box1_margin[0] <= box2[0] <= box1_margin[1]:
        lat_overlap = True
    if box1_margin[0] <= box2[1] <= box1_margin[1]:
        lat_overlap = True
    if box1_margin[2] <= box2[2] <= box1_margin[3]:
        lon_overlap = True
    if box1_margin[2] <= box2[3] <= box1_margin[3]:
        lon_overlap = True
        
    return (lat_overlap and lon_overlap)

Given a [local_street] x [town], find all other roads that intersect with it.

Some roads like "Main Street" might be very common.  To avoid false-positives, we
check the bounding boxes for both the original street and the candidate street that
appears to intersect based on the [local_street] name alone.  If the bounding boxes
overlap, or are close, then we are comfortable that it's not a duplicate street name
from another area.

In [153]:
def lookup_bounding_box(objectid):
    row = pbn[pbn['objectid']==objectid]
    
    try:
        gs = row['geometry']
        return [gs.bounds['miny'][0], gs.bounds['maxy'][0], gs.bounds['minx'][0], gs.bounds['maxx'][0]]
    except Exception:
        return None
    
def inside_bounding_box(node_id, bounding_box, margin=0.05):
    # Add margin to box1
    box_margin = []
    box_margin.append(bounding_box[0] - margin)
    box_margin.append(bounding_box[1] + margin)
    box_margin.append(bounding_box[2] - margin)
    box_margin.append(bounding_box[3] + margin)
    
    if node_id not in node_lat:
        return False
    if node_id not in node_lon:
        return False
    if not (box_margin[0] <= float(node_lat[node_id]) <= box_margin[1]):
        return False
    if not (box_margin[2] <= float(node_lon[node_id]) <= box_margin[3]):
        return False
    return True

In [155]:
def find_intersections(street, town, suburb, city, objectid=0, debug=0):
    # Find bounding_box of original street
    
    # First try using information loaded from directly form PBN coordinates
    original_bounding_box = lookup_bounding_box(objectid)
    
    if original_bounding_box is None:
        # Use most specific first town->suburb->city
        # E.g. Aberdeen Road Fyansford might actually be found in Geelong
        original_street = nominatim_search(str(street).upper(), str(town).upper())
    
        if (len(original_street) < 1):
            original_street = nominatim_search(str(street).upper(), str(suburb).upper())
        
            if (len(original_street) < 1):
                original_street = nominatim_search(str(street).upper(), str(city).upper())
            
                if (len(original_street) < 1):
                    return []
    
        original_bounding_box_str = original_street[0]['boundingbox']
        original_bounding_box = [float(i) for i in original_bounding_box_str]
    
    if (debug > 2):
        print('Original:  ' + street.upper() + " = " + str(original_bounding_box))
    
    # Find way_id list for the name
    way_ids = ways_by_name[street.upper()]
    
    intersection_dict = {}
    
    # Find every matching street name (possibly in another suburb!)   
    loop_counter = 0
    
    for way_id in way_ids:
        # Find every node associated with that street name (including other suburbs)            
        for node_id in nodes_per_way[way_id]:
            # Check that this street is in the same area (probably the same suburb)
            if inside_bounding_box(node_id, original_bounding_box):
                # Find every other street the node is associated with                
                for way_id2 in ways_per_node[node_id]:
                    # Ignore any ways that were clipped from the map
                    if way_id2 in ways_by_id:
                        # Find the street name for the other potential intersecting street
                        intersection_name = ways_by_id[way_id2]
                        if intersection_name.upper() != street.upper():                        
                            # Check Nominatim service to see if boundary boxes roughly overlap
                        
                            pot_streets = nominatim_search(intersection_name.upper(), city.upper())
                            for pot_street in pot_streets:
                                pot_street_bounding_box_str = pot_street['boundingbox']
                                pot_street_bounding_box = [float(i) for i in pot_street_bounding_box_str]
                            
                                if (debug > 2):
                                    print('Potential: ' + intersection_name + ' = ' + str(pot_street_bounding_box))
                            
                                if is_overlapping(original_bounding_box, pot_street_bounding_box):
                                    if (debug > 2):
                                        print('Matched: ' + intersection_name + ' = ' + str(pot_street_bounding_box) + ' vs ' + str(original_bounding_box))
                                    intersection_dict[intersection_name] = [float(node_lat[node_id]), float(node_lon[node_id])]
                                    if (debug > 2):
                                        print('Trace: ' + str(node_id) + ' ' + intersection_name + ' => ' + str(intersection_dict[intersection_name]))
                                loop_counter = loop_counter + 1
    
    if (debug > 0):
        print(str(objectid) + ' ' + street + ' Matching Ways: ' + str(len(way_ids)) + ' loops: ' + str(loop_counter))
    
    # Transform dictionary into list of key/value pairs
    intersection_list = []
    
    for intersection_name, intersection_details in intersection_dict.items():
        intersection_entry = [intersection_name, intersection_details[0], intersection_details[1]]
        intersection_list.append(intersection_entry)
        
    return intersection_list

Demonstration:  Find intersections for one street

In [92]:
find_intersections('ABBEY WALK', 'VERMONT', 'VERMONT', 'VERMONT', 0, debug=0)

[['HEATHERDALE ROAD', -37.8388277, 145.2132833],
 ['TULKARA GROVE', -37.8391825, 145.2130727],
 ['CULBARA DRIVE', -37.8396951, 145.2122922],
 ['THE MEWS', -37.8407696, 145.2118413]]

Get Google Maps API connection

$6.70 AUD per 1000 requests
Refs:

https://medium.com/future-vision/google-maps-in-python-part-2-393f96196eaf
https://developers.google.com/maps/documentation/geocoding/get-api-key
https://github.com/pbugnion/gmaps/issues/79

In [64]:
with open('apikey.txt') as f:
    api_key = f.readline()
    f.close

gmaps.configure(api_key=api_key)

def mid_coords(lat1, lat2, lon1, lon2):
    return ((lat1 + lat2)/2, (lon1 + lon2)/2)

In [65]:
gmaps.figure(center=mid_coords(-37.8423446, -37.8388277, 145.2113893, 145.2132833), zoom_level=14)

Figure(layout=FigureLayout(height='420px'))

## Find distinct [local_street] x [town] combinations in the data

In [66]:
# Filter to only include 'Existing' routes
pbn_likely1 = pbn_explode[pbn_explode['status']=='Existing']

# Filter to only include 'On Road' routes
pbn_likely2 = pbn_likely1[pbn_likely1['type']=='On Road']

# Filter to exclude 'n/a' streets
pbn_likely = pbn_likely2[pbn_likely2['local_street'] != 'n/a']

# Get distinct combinations
pbn_distinct1 = pbn_likely.groupby(['objectid', 'local_street', 'town', 'suburb', 'city']).size().reset_index().rename(columns={0:'count'})

# Get first row per objectid
pbn_distinct = pbn_distinct1.groupby('objectid').first().reset_index()

In [207]:
# Show a sample
pbn_distinct.tail(3)

Find all intersections for each of the 2,125 roads

This is sometimes SLOW for a street.  E.g. there are 400x "ALBERT STREET" in Victoria,
therefore we have to check each occurrence of "ALBERT STREET" in the dataset to EVERY
street that intersects ANY "ALBERT STREET" in Victoria.  And we need to call the Nominatim
web service to get the bounding box for each street involved.

In [157]:
log_starting('Find all intersections')

tqdm.pandas()

pbn_sample = pbn_distinct[:6].copy()
pbn_sample.loc[:, 'intersections'] = pbn_sample.progress_apply(lambda x: find_intersections(x['local_street'], x['town'], x['suburb'], x['city'], objectid=x['objectid'], debug=1), axis=1)


log_finished('Find all intersections')

pbn_sample.head()
print(pbn_sample['intersections'][0])

2021-08-12 13:36:13.991911 START - Find all intersections


  0%|          | 0/6 [00:00<?, ?it/s]

616 HYDE STREET Matching Ways: 38 loops: 407
767 HYDE STREET Matching Ways: 38 loops: 407
775 HYDE STREET Matching Ways: 38 loops: 407
783 HYDE STREET Matching Ways: 38 loops: 407
931 HYDE STREET Matching Ways: 38 loops: 407
965 HIGH STREET Matching Ways: 1227 loops: 2044
2021-08-12 13:36:14.766731 END   - Find all intersections(0.77482)
[['FRANCIS STREET', -37.8218002, 144.8958597], ['BUNBURY STREET', -37.802393, 144.902391], ['SOMERVILLE ROAD', -37.8143847, 144.8971759], ['NICHOLSON STREET', -37.8130359, 144.8974037], ['NAPIER STREET', -37.8043721, 144.9013761], ['LAWSON PLACE', -37.8053109, 144.9009415], ['PARKER STREET', -37.8062754, 144.9004949], ['BRISTOW STREET', -37.8069312, 144.9001913], ['PRINCESS STREET', -37.8107707, 144.898412], ['HARRIS STREET', -37.8112324, 144.8981981], ['BERRY STREET', -37.8118006, 144.8979348], ['LYONS STREET', -37.8084568, 144.8994861], ['GLOBE STREET', -37.8223414, 144.8957649], ['VOCKLER STREET', -37.8228866, 144.8956667], ['STONY CREEK WALK', -37.

In [165]:
pbn_sample.head()

Unnamed: 0,objectid,local_street,town,suburb,city,count,intersections
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
1,767,HYDE STREET,Seddon,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
2,775,HYDE STREET,Seddon,Footscray,Melbourne,2,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
3,783,HYDE STREET,Seddon,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
4,931,HYDE STREET,Footscray,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."


In [166]:
log_starting('Find all intersections')

tqdm.pandas()

pbn_full = pbn_distinct.copy()
pbn_full['intersections'] = pbn_full.progress_apply(lambda x: find_intersections(x['local_street'], x['town'], x['suburb'], x['city'], objectid=x['objectid']), axis=1)

log_finished('Find all intersections')

2021-08-12 13:46:36.884861 START - Find all intersections


  0%|          | 0/16269 [00:00<?, ?it/s]

2021-08-12 13:50:55.425615 END   - Find all intersections(258.540754)


In [167]:
pbn_full.head(5)



Unnamed: 0,objectid,local_street,town,suburb,city,count,intersections
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
1,767,HYDE STREET,Seddon,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
2,775,HYDE STREET,Seddon,Footscray,Melbourne,2,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
3,783,HYDE STREET,Seddon,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."
4,931,HYDE STREET,Footscray,Footscray,Melbourne,1,"[[FRANCIS STREET, -37.8218002, 144.8958597], [BUNBURY STREET, -37.802393, 144.902391], [SOMERVIL..."


In [168]:
log_starting('Save geocoded PBN dataset')

pbn_full.to_csv('pbn_intersections.csv')

log_finished('Save geocoded PBN dataset')

2021-08-12 13:53:10.978528 START - Save geocoded PBN dataset
2021-08-12 13:53:12.033512 END   - Save geocoded PBN dataset(1.054984)


In [169]:
find_intersections('ABERDEEN STREET', 'Fyansford', 'Geelong', 'Geelong', debug=0)

[['GARLICK AVENUE', -38.1441314, 144.3252059],
 ['DEVIATION ROAD', -38.1440195, 144.3242695],
 ['GAYLARD AVENUE', -38.1440195, 144.3242695],
 ['SHELLEY AVENUE', -38.1444743, 144.3280753],
 ['LANCASTER AVENUE', -38.1443603, 144.3271211],
 ['WARWICK STREET', -38.1442465, 144.3261692],
 ['MCNICOL STREET', -38.147204, 144.351441],
 ['PAKINGTON STREET', -38.1467378, 144.3471356],
 ['COQUETTE STREET', -38.1463881, 144.3441454],
 ['ST JAMES STREET', -38.1460828, 144.3415348],
 ['CUMBERLAND STREET', -38.1460585, 144.341327],
 ['BENDIGO STREET', -38.1459284, 144.3402153],
 ['EUREKA STREET', -38.1458362, 144.3394268],
 ['GEORGE STREET', -38.1457103, 144.3383503],
 ['SHANNON AVENUE', -38.1453737, 144.3354723],
 ['CHURCHILL AVENUE', -38.1448699, 144.3313519],
 ['SILK AVENUE', -38.1449884, 144.3323193],
 ['COLLEGE COURT', -38.1448131, 144.3308891],
 ['MINERVA ROAD', -38.1445919, 144.3290786],
 ['EMERALD STREET', -38.1471373, 144.3505146],
 ['CORONATION STREET', -38.1470641, 144.3498958],
 ['LA TROB

In [170]:
find_intersections('MILLBANK DRIVE', 'MOUNT ELIZA', 'MOUNT ELIZA', 'MOUNT ELIZA', debug=0)

[['BELLBIRD ROAD', -38.1990811, 145.1044224],
 ['MATHER ROAD', -38.1959845, 145.1040959],
 ['RYLSTON COURT', -38.1940608, 145.1046181],
 ['ESME COURT', -38.192864, 145.1043193],
 ['WALKERS ROAD', -38.1894195, 145.1037342]]

In [184]:
def find_bearing_and_distance(objectid, intersection):
    if type(intersection) != list:
        return None
    
    lat = intersection[1]
    lon = intersection[2]
    
    #print('int type: ' + str(type(intersection)) + ' value: ' + str(intersection))
    #print('lat type: ' + str(type(lat)) + ' value: ' + str(lat))
    #print('lon type: ' + str(type(lon)) + ' value: ' + str(lat))
    
    row = pbn[pbn['objectid']==objectid]
    
    gs = row['geometry']
    
    # Flatten into list of coordinates
    coords_list = []
    
    for g in gs:
        if type(g) is LineString:
            for xy in g.coords:
                coords_list.append((xy[1], xy[0]))
        elif type(g) is MultiLineString:
            for ls in g:
                for xy in ls.coords:
                    coords_list.append((xy[1], xy[0]))
    
    # Find the index of the closest point, and the distance in metres
    coords_this  = (lat, lon)
    
    min_distance = 20000000
    min_i        = -1
    
    for i in range(len(coords_list)):
        distance = geopy.distance.distance(coords_this, coords_list[i]).m
        
        if distance < min_distance:
            min_distance = distance
            min_i        = i

    # Get bearing to previous and next point (if applicable)
    if i > 0:
        coords_prev = coords_list[i-1]
        bearing_prev = Geodesic.WGS84.Inverse(coords_prev[0], coords_prev[1], lat, lon)['azi1']
        if bearing_prev < 0:
            bearing_prev = bearing_prev + 360
    else:
        bearing_prev = None
        
    if i < len(coords_list)-1:
        coords_next = coords_list[i+1]
        bearing_next = Geodesic.WGS84.Inverse(lat, lon, coords_next[0], coords_next[1])['azi1']
        if bearing_next < 0:
            bearing_next = bearing_next + 360
    else:
        bearing_next = None
        
    if   bearing_prev is None and bearing_next is not None:
        bearing = round(bearing_next)
    elif bearing_next is None and bearing_prev is not None:
        bearing = round(bearing_prev)
    else:
        bearing = round((bearing_prev + bearing_next) / 2)
    
    # Return a list:
    # 0: Bearing
    # 1: Distance to closest point
    # 2: Coordinates (lat, lon) of closest point
    
    return [bearing, min_distance, coords_list[i][0], coords_list[i][1]]


find_bearing_and_distance(1, [0, -38.1990811, 145.1044225])

[210, 26366.064370313623, -37.99340425354793, 145.25487585849666]

In [172]:
pbn_sample2 = pbn_sample.explode('intersections')

In [173]:
pbn_sample2.head()

Unnamed: 0,objectid,local_street,town,suburb,city,count,intersections
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[FRANCIS STREET, -37.8218002, 144.8958597]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[BUNBURY STREET, -37.802393, 144.902391]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[SOMERVILLE ROAD, -37.8143847, 144.8971759]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[NICHOLSON STREET, -37.8130359, 144.8974037]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[NAPIER STREET, -37.8043721, 144.9013761]"


In [174]:
log_starting('Find bearings')

tqdm.pandas()

pbn_sample3 = pbn_sample2.copy()
pbn_sample3.loc[:, 'bearings'] = pbn_sample3.progress_apply(lambda x: \
    find_bearing_and_distance(x['objectid'], x['intersections']), axis=1)

log_finished('Find bearings')

pbn_sample3.head()
#print(pbn_sample3['bearings'][0])


2021-08-12 13:53:56.637425 START - Find bearings


  0%|          | 0/266 [00:00<?, ?it/s]

2021-08-12 13:54:00.984927 END   - Find bearings(4.347502)


Unnamed: 0,objectid,local_street,town,suburb,city,count,intersections,bearings
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[FRANCIS STREET, -37.8218002, 144.8958597]","[193, 1520.9769161975325, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[BUNBURY STREET, -37.802393, 144.902391]","[21, 541.1451130164029, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[SOMERVILLE ROAD, -37.8143847, 144.8971759]","[198, 695.0741707977384, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[NICHOLSON STREET, -37.8130359, 144.8974037]","[200, 546.9297445837417, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[NAPIER STREET, -37.8043721, 144.9013761]","[19, 304.14867789262314, -37.80841519742836, 144.89956141926396]"


In [175]:
pd.set_option('display.max_colwidth', 100)
print(pbn_sample3['bearings'])

0    [193, 1520.9769161975325, -37.80841519742836, 144.89956141926396]
0      [21, 541.1451130164029, -37.80841519742836, 144.89956141926396]
0     [198, 695.0741707977384, -37.80841519742836, 144.89956141926396]
0     [200, 546.9297445837417, -37.80841519742836, 144.89956141926396]
0     [19, 304.14867789262314, -37.80841519742836, 144.89956141926396]
                                   ...                                
5       [7, 3925.3388822316383, -37.77161492986355, 144.9985490516961]
5        [7, 3784.882748296061, -37.77161492986355, 144.9985490516961]
5       [7, 3755.3374275927204, -37.77161492986355, 144.9985490516961]
5     [189, 1179.9334016528824, -37.77161492986355, 144.9985490516961]
5       [190, 1451.79207173655, -37.77161492986355, 144.9985490516961]
Name: bearings, Length: 266, dtype: object


In [176]:
find_intersections('HYDE STREET', 'Seddon', 'Footscray', 'Melbourne', 616, debug=0)

[['FRANCIS STREET', -37.8218002, 144.8958597],
 ['BUNBURY STREET', -37.802393, 144.902391],
 ['SOMERVILLE ROAD', -37.8143847, 144.8971759],
 ['NICHOLSON STREET', -37.8130359, 144.8974037],
 ['NAPIER STREET', -37.8043721, 144.9013761],
 ['LAWSON PLACE', -37.8053109, 144.9009415],
 ['PARKER STREET', -37.8062754, 144.9004949],
 ['BRISTOW STREET', -37.8069312, 144.9001913],
 ['PRINCESS STREET', -37.8107707, 144.898412],
 ['HARRIS STREET', -37.8112324, 144.8981981],
 ['BERRY STREET', -37.8118006, 144.8979348],
 ['LYONS STREET', -37.8084568, 144.8994861],
 ['GLOBE STREET', -37.8223414, 144.8957649],
 ['VOCKLER STREET', -37.8228866, 144.8956667],
 ['STONY CREEK WALK', -37.8247498, 144.8953386],
 ['HYDE STREET OFFRAMP', -37.8264485, 144.8950395],
 ['DOUGLAS PARADE', -37.826817, 144.8949746],
 ['KNOX STREET', -37.8152418, 144.8970302],
 ['HALL STREET', -37.8159693, 144.8969072],
 ['LENNOX STREET', -37.8162836, 144.8968438],
 ['SCHILD STREET', -37.8172032, 144.8966674],
 ['LEEK STREET', -37.8173

In [177]:
pbn_full2 = pbn_full.explode('intersections')

In [185]:
log_starting('Find bearings')

tqdm.pandas()

pbn_full3 = pbn_full2.copy()
pbn_full3.loc[:, 'bearings'] = pbn_full3.progress_apply(lambda x: \
    find_bearing_and_distance(x['objectid'], x['intersections']), axis=1)

log_finished('Find bearings')

pbn_full3.head()

2021-08-12 14:02:11.281767 START - Find bearings


  0%|          | 0/387750 [00:00<?, ?it/s]

2021-08-12 15:32:55.580015 END   - Find bearings(5444.298248)


Unnamed: 0,objectid,local_street,town,suburb,city,count,intersections,bearings
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[FRANCIS STREET, -37.8218002, 144.8958597]","[193, 1520.9769161975325, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[BUNBURY STREET, -37.802393, 144.902391]","[21, 541.1451130164029, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[SOMERVILLE ROAD, -37.8143847, 144.8971759]","[198, 695.0741707977384, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[NICHOLSON STREET, -37.8130359, 144.8974037]","[200, 546.9297445837417, -37.80841519742836, 144.89956141926396]"
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,"[NAPIER STREET, -37.8043721, 144.9013761]","[19, 304.14867789262314, -37.80841519742836, 144.89956141926396]"


In [187]:
pbn_full3[pbn_full3['town'] == 'Mount Eliza']

Unnamed: 0,objectid,local_street,town,suburb,city,count,intersections,bearings
323,3224,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[CRAIGIE ROAD, -38.2559272, 145.0358127]","[210, 11453.897551229755, -38.16681534561076, 145.10175280069583]"
323,3224,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[BENTONS ROAD, -38.2446662, 145.0375275]","[213, 10311.09677009861, -38.16681534561076, 145.10175280069583]"
323,3224,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[DAVA DRIVE, -38.2397081, 145.0360947]","[215, 9926.717891585355, -38.16681534561076, 145.10175280069583]"
323,3224,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[MORNINGTON - TYABB ROAD, -38.229573, 145.0449643]","[215, 8559.882672139647, -38.16681534561076, 145.10175280069583]"
323,3224,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[ITHACA ROAD, -38.1640515, 145.1022566]","[10, 296.7850220513333, -38.16681534561076, 145.10175280069583]"
...,...,...,...,...,...,...,...,...
475,3472,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[ALLISON STREET, -38.2258346, 145.0498248]","[225, 2256.213937800071, -38.20896255380091, 145.07119332220725]"
475,3472,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[PENTECOST ROAD, -38.2267859, 145.0485423]","[225, 2410.283590244056, -38.20896255380091, 145.07119332220725]"
475,3472,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[SPRAY STREET, -38.2283828, 145.0461103]","[226, 2686.3225964013773, -38.20896255380091, 145.07119332220725]"
475,3472,NEPEAN HIGHWAY,Mount Eliza,Mount Eliza,Melbourne,2,"[WOORALLA DRIVE, -38.1951263, 145.0912533]","[48, 2333.681982929821, -38.20896255380091, 145.07119332220725]"


In [203]:
# Extract or derive geo fields from geocode_list

def get_list_field(intersection, index):
    if not (type(intersection)==list):
        return None
    return intersection[index]
            
log_starting('Extract intersection and bearing fields')

tqdm.pandas()

pbn_full4 = pbn_full3.copy()
pbn_full4.loc[:, 'intersection_street'] = pbn_full4.progress_apply(lambda x: get_list_field(x['intersections'], 0), axis=1)
pbn_full4.loc[:, 'intersection_lat']    = pbn_full4.progress_apply(lambda x: get_list_field(x['intersections'], 1), axis=1)
pbn_full4.loc[:, 'intersection_lon']    = pbn_full4.progress_apply(lambda x: get_list_field(x['intersections'], 2), axis=1)
pbn_full4.loc[:, 'bearing']             = pbn_full4.progress_apply(lambda x: get_list_field(x['bearings'], 0), axis=1)
pbn_full4.loc[:, 'bearing_lat']         = pbn_full4.progress_apply(lambda x: get_list_field(x['bearings'], 1), axis=1)
pbn_full4.loc[:, 'bearing_lon']         = pbn_full4.progress_apply(lambda x: get_list_field(x['bearings'], 2), axis=1)
pbn_full4.drop(labels=['intersections', 'bearings'], axis=1, inplace=True)

log_finished('Extract intersection and bearing fields')
pbn_full4.head()

2021-08-12 16:16:04.284407 START - Extract intersection and bearing fields


  0%|          | 0/387750 [00:00<?, ?it/s]

  0%|          | 0/387750 [00:00<?, ?it/s]

  0%|          | 0/387750 [00:00<?, ?it/s]

  0%|          | 0/387750 [00:00<?, ?it/s]

  0%|          | 0/387750 [00:00<?, ?it/s]

  0%|          | 0/387750 [00:00<?, ?it/s]

2021-08-12 16:16:19.066910 END   - Extract intersection and bearing fields(14.782503)


Unnamed: 0,objectid,local_street,town,suburb,city,count,intersection_street,intersection_lat,intersection_lon,bearing,bearing_lat,bearing_lon
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,FRANCIS STREET,-37.8218,144.89586,193.0,1520.976916,-37.808415
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,BUNBURY STREET,-37.802393,144.902391,21.0,541.145113,-37.808415
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,SOMERVILLE ROAD,-37.814385,144.897176,198.0,695.074171,-37.808415
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,NICHOLSON STREET,-37.813036,144.897404,200.0,546.929745,-37.808415
0,616,HYDE STREET,Seddon,Footscray,Melbourne,1,NAPIER STREET,-37.804372,144.901376,19.0,304.148678,-37.808415


In [205]:
log_starting('Save bearings PBN dataset')

pbn_full4.to_csv('pbn_bearings.csv')

log_finished('Save bearings PBN dataset')

2021-08-12 16:19:29.784473 START - Save bearings PBN dataset
2021-08-12 16:19:33.195692 END   - Save bearings PBN dataset(3.411219)
