# UrbanAccess transit accessiblity calculation for auto ownership model

This notebook only works in Python 2.7 and is meant to generate a table of transit accessiblity indicators for Bay Area parcels to be used in the auto ownership model.

In [1]:
import pandas as pd
import pandana as pdna
import time

import urbanaccess as ua
from urbanaccess.config import settings
from urbanaccess.gtfsfeeds import feeds
from urbanaccess import gtfsfeeds
from urbanaccess.gtfs.gtfsfeeds_dataframe import gtfsfeeds_dfs
from urbanaccess.network import ua_network, load_network

%matplotlib inline

In [3]:
agencies = ['Bay Area Rapid Transit','AC transit','Caltrain','County Connection','Fairfield and Suisun Transit',
           'Golden Gate Transit','SFMTA','SamTrans','Soltrans','Marin Transit',
           'Sonoma County Transit']

for agency in agencies:
    gtfsfeeds.search(search_text=agency,
                 search_field=None,
                 match='contains',
                 add_feed=True)

Note: Your use of a GTFS feed is governed by each GTFS feed author license terms. It is suggested you read the respective license terms for the appropriate use of a GTFS feed.
Found 1 records that matched ['Bay Area Rapid Transit'] inside ['name', 'url', 'dataexchange_id', 'feed_baseurl'] columns:
Added 1 feeds to gtfs_feeds: {'Bay Area Rapid Transit': 'http://www.gtfs-data-exchange.com/agency/bay-area-rapid-transit/latest.zip'}
Added 1 records to gtfs_feed list:
Note: Your use of a GTFS feed is governed by each GTFS feed author license terms. It is suggested you read the respective license terms for the appropriate use of a GTFS feed.
Found 1 records that matched ['AC transit'] inside ['name', 'url', 'dataexchange_id', 'feed_baseurl'] columns:
Added 1 feeds to gtfs_feeds: {'AC Transit': 'http://www.gtfs-data-exchange.com/agency/ac-transit/latest.zip'}
Added 1 records to gtfs_feed list:
Note: Your use of a GTFS feed is governed by each GTFS feed author license terms. It is suggested yo

In [4]:
# requiring manual download

feeds.add_feed(add_dict={
'Petaluma Transit':'https://transitfeeds.com/p/petaluma-transit-gtfs/675/latest/download',
'Emery Go-Round':'https://transitfeeds.com/p/emery-go-round/769/latest/download',
'VTA':'https://transitfeeds.com/p/vta/45/latest/download',
'WestCAT':'https://transitfeeds.com/p/westcat/682/latest/download',
})

Added 4 feeds to gtfs_feeds: {'VTA': 'https://transitfeeds.com/p/vta/45/latest/download', 'Emery Go-Round': 'https://transitfeeds.com/p/emery-go-round/769/latest/download', 'WestCAT': 'https://transitfeeds.com/p/westcat/682/latest/download', 'Petaluma Transit': 'https://transitfeeds.com/p/petaluma-transit-gtfs/675/latest/download'}


In [5]:
gtfsfeeds.download()

17 GTFS feeds will be downloaded here: data/gtfsfeed_zips
VTA GTFS feed downloaded successfully. Took 1.07 seconds for 4,109,295.0KB
Sonoma County Transit GTFS feed downloaded successfully. Took 1.18 seconds for 728,755.0KB
Caltrain Shuttles GTFS feed downloaded successfully. Took 0.96 seconds for 68,198.0KB
Golden Gate Transit GTFS feed downloaded successfully. Took 1.40 seconds for 2,629,268.0KB
AC Transit GTFS feed downloaded successfully. Took 1.58 seconds for 5,269,792.0KB
WestCAT GTFS feed downloaded successfully. Took 0.57 seconds for 453,300.0KB
Marin Transit GTFS feed downloaded successfully. Took 1.24 seconds for 878,923.0KB
Emery Go-Round GTFS feed downloaded successfully. Took 0.68 seconds for 103,347.0KB
San Francisco Municipal Transportation Agency GTFS feed downloaded successfully. Took 1.55 seconds for 8,287,793.0KB
Caltrain Commuter Shuttle (call for pick-up) GTFS feed downloaded successfully. Took 0.82 seconds for 100,970.0KB
SolTrans GTFS feed downloaded successfully

In [8]:
validation = True
verbose = False
# bbox for the Bay Area
bbox = (-123.024607,36.894196,-121.207972,38.863927)
remove_stops_outsidebbox = True
append_definitions = True

loaded_feeds = ua.gtfs.load.gtfsfeed_to_df(gtfsfeed_path=None,
                                           validation=validation,
                                           verbose=verbose,
                                           bbox=bbox,
                                           remove_stops_outsidebbox=remove_stops_outsidebbox,
                                           append_definitions=append_definitions)

GTFS text file encoding check completed. Took 0.08 seconds
GTFS text file header whitespace check completed. Took 0.67 seconds
--------------------------------
Processing GTFS feed: San Francisco Municipal Transportation Agency
The unique agency id: san_francisco_municipal_transportation_agency was generated using the name of the agency in the agency.txt file.
Unique agency id operation complete. Took 0.08 seconds
Unique GTFS feed id operation complete. Took 0.02 seconds
Removed identified stops that are outside of bounding box.
San Francisco Municipal Transportation Agency GTFS feed stops: coordinates are in northwest hemisphere. Latitude = North (90); Longitude = West (-90).
Appended route type to stops
Appended route type to stop_times
--------------------------------
--------------------------------
Processing GTFS feed: WestCAT
The unique agency id: westcat_(western_contra_costa) was generated using the name of the agency in the agency.txt file.
Unique agency id operation complete

--------------------------------
--------------------------------
Processing GTFS feed: Fairfield and Suisun Transit
The unique agency id: fairfield_and_suisun_transit was generated using the name of the agency in the agency.txt file.
Unique agency id operation complete. Took 0.01 seconds
Unique GTFS feed id operation complete. Took 0.00 seconds
No GTFS feed stops were found to be outside the bounding box coordinates
Fairfield and Suisun Transit GTFS feed stops: coordinates are in northwest hemisphere. Latitude = North (90); Longitude = West (-90).
Appended route type to stops
Appended route type to stop_times
--------------------------------
--------------------------------
Processing GTFS feed: Bay Area Rapid Transit
The unique agency id: bay_area_rapid_transit was generated using the name of the agency in the agency.txt file.
Unique agency id operation complete. Took 0.01 seconds
Unique GTFS feed id operation complete. Took 0.00 seconds
No GTFS feed stops were found to be outside th

In [9]:
# create transit network
ua.gtfs.network.create_transit_net(gtfsfeeds_dfs=loaded_feeds,
                                   day='monday',
                                   timerange=['07:00:00', '10:00:00'],
                                   calendar_dates_lookup=None)


Using calendar to extract service_ids to select trips.
101 service_ids were extracted from calendar
39,898 trip(s) 48.27 percent of 82,658 total trip records were found in calendar for GTFS feed(s): ['san francisco municipal transportation agency', 'westcat', 'caltrain shuttles', 'marin transit', 'samtrans', 'petaluma transit', 'caltrain commuter shuttle (call for pick-up)', 'golden gate transit', 'caltrain', 'emery go-round', 'ac transit', 'county connection', 'vta', 'soltrans', 'fairfield and suisun transit', 'bay area rapid transit', 'sonoma county transit']
NOTE: If you expected more trips to have been extracted and your GTFS feed(s) have a calendar_dates file, consider utilizing the calendar_dates_lookup parameter in order to add additional trips based on information inside of calendar_dates. This should only be done if you know the corresponding GTFS feed is using calendar_dates instead of calendar to specify service_ids. When in doubt do not use the calendar_dates_lookup paramet

<urbanaccess.network.urbanaccess_network at 0x7fe47db53d90>

In [13]:
# create OSM network
nodes, edges = ua.osm.load.ua_network_from_bbox(bbox=bbox,
                                                remove_lcn=True)


Requesting network data within bounding box from Overpass API in 20 request(s)
Posting to http://www.overpass-api.de/api/interpreter with timeout=180, "{'data': '[out:json][timeout:180];(way["highway"]["highway"!~"motor|proposed|construction|abandoned|platform|raceway"]["foot"!~"no"]["pedestrians"!~"no"](36.89419600,-123.02460700,37.29094537,-122.56818792);>;);out;'}"
Downloaded 0.3KB from www.overpass-api.de in 0.51 seconds
Posting to http://www.overpass-api.de/api/interpreter with timeout=180, "{'data': '[out:json][timeout:180];(way["highway"]["highway"!~"motor|proposed|construction|abandoned|platform|raceway"]["foot"!~"no"]["pedestrians"!~"no"](37.29016027,-123.02460468,37.68766809,-122.56589827);>;);out;'}"
Downloaded 0.3KB from www.overpass-api.de in 0.62 seconds
Posting to http://www.overpass-api.de/api/interpreter with timeout=180, "{'data': '[out:json][timeout:180];(way["highway"]["highway"!~"motor|proposed|construction|abandoned|platform|raceway"]["foot"!~"no"]["pedestrians"!~

Downloaded 39,387.5KB from www.overpass-api.de in 5.83 seconds
Downloaded OSM network data within bounding box from Overpass API in 20 request(s) and 193.69 seconds
132,883 duplicate records removed. Took 161.88 seconds
Returning OSM data with 3,571,033 nodes and 578,039 ways...
Edge node pairs completed. Took 1,443.66 seconds
Returning processed graph with 774,016 nodes and 2,182,006 edges...
Completed OSM data download and Pandana node and edge table creation in 1,838.48 seconds
checking for low connectivity nodes...
1,965 out of 774,016 nodes (0.25 percent of total) were identified as having low connectivity and have been removed.
Completed OSM data download and graph node and edge table creation in 2,120.89 seconds


In [15]:
#Create a pedestrian network
urbanaccess_net = ua.network.ua_network
ua.osm.network.create_osm_net(osm_edges=edges,
                              osm_nodes=nodes,
                              travel_speed_mph=3)

Created OSM network with travel time impedance using a travel speed of 3 MPH. Took 0.44 seconds


<urbanaccess.network.urbanaccess_network at 0x7fe47db53d90>

In [16]:
# integrate transit and ped


ua.network.integrate_network(urbanaccess_network=urbanaccess_net,
                             headways=False)

Loaded UrbanAccess network components comprised of:
     Transit: 18,618 nodes and 285,140 edges;
     OSM: 772,051 nodes and 2,178,060 edges
Connector edges between the OSM and transit network nodes successfully completed. Took 9.40 seconds
Fixed unicode error in name column
Edge and node tables formatted for Pandana with integer node ids: id_int, to_int, and from_int. Took 32.57 seconds
Network edge and node network integration completed successfully resulting in a total of 790,669 nodes and 2,500,436 edges:
     Transit: 18,618 nodes 285,140 edges;
     OSM: 772,051 nodes 2,178,060 edges; and
     OSM/Transit connector: 37,236 edges.


<urbanaccess.network.urbanaccess_network at 0x7fe47db53d90>

In [68]:
urbanaccess_net.net_nodes.location_type.replace(' ',0,inplace = True)
urbanaccess_net.net_nodes.location_type.fillna(0,inplace = True)

In [72]:
urbanaccess_net.net_nodes.zone_id.replace(' ',0,inplace = True)
urbanaccess_net.net_nodes.zone_id.fillna(0,inplace = True)
urbanaccess_net.net_nodes.zone_id = urbanaccess_net.net_nodes.zone_id.astype(str)

In [79]:
store = pd.HDFStore('transit_net_bay_area.h5')
store.close()

urbanaccess_net.net_edges.to_hdf('transit_net_bay_area.h5', key='edges', mode='a', format='table')
urbanaccess_net.net_nodes.to_hdf('transit_net_bay_area.h5', key='nodes',mode='a', format='table')


In [100]:
# use with census
parcels = pd.read_csv('/home/data/fall_2018/parcel_attr.csv')


In [101]:
lng_max, lat_min, lng_min, lat_max = bbox
outside_bbox = parcels.loc[~(((lng_max < parcels["x"]) & (parcels["x"] < lng_min)) & ((lat_min < parcels["y"]) & (parcels["y"] < lat_max)))]
parcels_subset = parcels.drop(outside_bbox.index)
print 'Total number of subset parcels: {:,}'.format(len(parcels_subset))

Total number of subset parcels: 1,945,762


In [102]:
s_time = time.time()
transit_ped_net = pdna.Network(urbanaccess_net.net_nodes["x"],
                               urbanaccess_net.net_nodes["y"],
                               urbanaccess_net.net_edges["from_int"],
                               urbanaccess_net.net_edges["to_int"],
                               urbanaccess_net.net_edges[["weight"]], 
                               twoway=False)
print('Took {:,.2f} seconds'.format(time.time() - s_time))

Took 15.28 seconds


In [103]:
parcels_subset['net_node_id'] = transit_ped_net.get_node_ids(parcels_subset['x'], parcels_subset['y'])

In [104]:
# join jobs onto parcels_subset
jobs_by_parcel = pd.concat([pd.read_csv('/home/data/fall_2018/buildings_v2.csv').set_index('building_id'),
jobs.groupby('building_id').size().rename('jobs')],axis = 1).groupby('parcel_id')['jobs'].sum()

In [105]:
parcels_subset = parcels_subset.merge(jobs_by_parcel.to_frame(),how = 'left', left_on = parcels_subset.index, right_on = 'parcel_id')
parcels_subset['jobs'].fillna(0, inplace = True)

In [106]:
transit_ped_net.set(parcels_subset.net_node_id, variable = parcels_subset.jobs, name='jobs')

In [80]:
jobs_45 = transit_ped_net.aggregate(45, type='sum', decay='linear', name='jobs')
jobs_30 = transit_ped_net.aggregate(30, type='sum', decay='linear', name='jobs')
jobs_15 = transit_ped_net.aggregate(15, type='sum', decay='linear', name='jobs')

In [110]:
jobs_45 = jobs_45.to_frame().rename('jobs_45')
jobs_30 = jobs_30.to_frame().rename('jobs_30')
jobs_15 = jobs_15.to_frame().rename('jobs_15')

In [112]:
parcels_subset = parcels_subset.merge(jobs_45, on = 'net_node_id', how = 'left').merge(
jobs_30, left_on = 'net_node_id', right_index = True, how = 'left').merge(
jobs_15, left_on = 'net_node_id', right_index = True, how = 'left')

In [90]:
# Export
jobs_45.to_csv('/home/jayne/ual_model_workspace/fall-2018-models/notebooks-jayne/Py27/jobs_45.csv',index = False)
jobs_30.to_csv('/home/jayne/ual_model_workspace/fall-2018-models/notebooks-jayne/Py27/jobs_30.csv',index = False)
jobs_15.to_csv('/home/jayne/ual_model_workspace/fall-2018-models/notebooks-jayne/Py27/jobs_15.csv', index = False)

In [113]:
parcels_subset.to_csv('/home/jayne/ual_model_workspace/fall-2018-models/notebooks-jayne/Py27/parcels_w_trn_acc.csv',index = False)