# Calculate TAZ centroids' nearest network nodes

Given an input dataset of zone-to-zone travel demand data, convert the origins and destinations to network nodes nearest to each zone's centroid.

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import osmnx as ox
import time

start_time = time.time()

In [2]:
output_file = 'data/pm_peak_nodes.csv' #where to save the final output
taz_travel_demand_file = 'data/pm_peak.csv' #travel demand from zone to zone
taz_shapefile = 'Travel_Analysis_Zones/Travel_Analysis_Zones.shp' #taz geometries
network_file = 'data/bayarea_tertiary_simplified.graphml' #the network we are modeling

## Load the input data

In [3]:
# the network that we are modeling
G = ox.load_graphml(network_file, folder='.')

In [4]:
# the TAZ shapefile
gdf = gpd.read_file(taz_shapefile)
gdf.shape

(1454, 6)

In [5]:
# the travel demand data from zone to zone
df = pd.read_csv(taz_travel_demand_file)
df.shape

(86563, 3)

## Project the shapefile to the same CRS as the network, then calculate TAZ centroids

In [6]:
# what are the CRSs of the TAZ shapefile and the network?
original_crs = gdf.crs
print(original_crs)

target_crs = G.graph['crs']
print(target_crs)

{'init': 'epsg:26910'}
{'init': 'epsg:4326'}


In [7]:
# project the TAZ geodataframe to the network's CRS
gdf = gdf.to_crs(target_crs)

In [8]:
# calculate TAZ centroids
gdf['centroid'] = gdf.centroid
gdf['centroid'].head()

0     POINT (-121.857633666841 37.28926565704089)
1     POINT (-121.853324889967 37.27757719988241)
2    POINT (-121.8155657776301 37.29462130901247)
3    POINT (-121.8156334258242 37.28067749895624)
4     POINT (-121.8326218122455 37.3119281358813)
Name: centroid, dtype: object

## Find nearest network node to each TAZ centroid

In [9]:
# extract lat, lng centroid tuples
points = gdf['centroid'].map(lambda p: (p.y, p.x))

In [10]:
# wrapper function for OSMnx to get network node nearest to some point
def nearest_node(point):
    return ox.get_nearest_node(G, point, method='greatcircle', return_dist=True)

In [11]:
%%time
nodes_distances = points.map(nearest_node)

Wall time: 1min 35s


In [12]:
# unzip the nearest nodes and their distances to the centroid into individual columns
nodes, distances = zip(*nodes_distances)
gdf['nearest_node'] = nodes
gdf['nearest_node_distance'] = distances

In [13]:
# display subset of results
gdf[['TAZ1454', 'centroid', 'nearest_node', 'nearest_node_distance']].head()

Unnamed: 0,TAZ1454,centroid,nearest_node,nearest_node_distance
0,566,POINT (-121.857633666841 37.28926565704089),1105792616,205.079915
1,565,POINT (-121.853324889967 37.27757719988241),1099259828,64.05103
2,573,POINT (-121.8155657776301 37.29462130901247),1097706630,277.684824
3,571,POINT (-121.8156334258242 37.28067749895624),65475266,205.125176
4,576,POINT (-121.8326218122455 37.3119281358813),65555564,475.644355


In [14]:
# create a dict keyed by zone ID with values of node ID nearest to zone's centroid
zone_nodes = gdf[['TAZ1454', 'nearest_node']].set_index('TAZ1454')['nearest_node'].to_dict()

## Add the network nodes to the zone-to-zone travel demand data

In [15]:
# make sure we're working with integers
df['DTAZ'] = df['DTAZ'].astype(np.int64)
df['OTAZ'] = df['OTAZ'].astype(np.int64)
df['trips'] = df['trips'].astype(np.int64)
df.head()

Unnamed: 0,OTAZ,DTAZ,trips
0,1,7,1
1,1,8,2
2,1,9,3
3,1,11,1
4,1,15,2


In [16]:
# given a zone, return the node nearest to its centroid
def lookup_nearest_node(zone):
    if zone in zone_nodes:
        return zone_nodes[zone]
    else:
        print('ERROR: zone "{}" not found in dict'.format(zone))

In [17]:
# get the origin nodes for all the origin zones
df['orig'] = df['OTAZ'].map(lookup_nearest_node)

In [18]:
# get the destination nodes for all the destination zones
df['dest'] = df['DTAZ'].map(lookup_nearest_node)

In [19]:
# clean up the data into the format LBNL desires
df['dest'] = df['dest'].astype(np.int64)
df['orig'] = df['orig'].astype(np.int64)
df = df.drop(labels=['OTAZ', 'DTAZ'], axis=1)
df = df.reindex(labels=['orig', 'dest', 'trips'], axis=1)

In [20]:
# display a subset of the final data
df.head()

Unnamed: 0,orig,dest,trips
0,65295278,65334120,1
1,65295278,65343958,2
2,65295278,65352337,3
3,65295278,65325032,1
4,65295278,65309522,2


In [21]:
# save out to CSV
df.to_csv(output_file, index=False, encoding='utf-8')

In [22]:
# process is all done, show elapsed time
elapsed_time = time.time() - start_time
elapsed_time

104.20572972297668